PyPI - markdown-to-confluence - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

markdown-to-confluence 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/METADATA +131 -14
markdown_to_confluence-0.4.0.dist-info/RECORD +25 -0
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/WHEEL +1 -1
md2conf/__init__.py +1 -1
md2conf/__main__.py +18 -7
md2conf/api.py +492 -187
md2conf/application.py +100 -83
md2conf/collection.py +31 -0
md2conf/converter.py +51 -112
md2conf/emoji.py +28 -3
md2conf/extra.py +14 -0
md2conf/local.py +33 -45
md2conf/matcher.py +54 -13
md2conf/mermaid.py +10 -4
md2conf/metadata.py +1 -3
md2conf/processor.py +137 -43
md2conf/properties.py +24 -5
md2conf/scanner.py +149 -0
markdown_to_confluence-0.3.4.dist-info/RECORD +0 -22
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/entry_points.txt +0 -0
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/licenses/LICENSE +0 -0
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/top_level.txt +0 -0
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/zip-safe +0 -0

md2conf/application.py CHANGED Viewed

@@ -6,22 +6,20 @@ Copyright 2022-2025, Levente Hunyadi
 :see: https://github.com/hunyadi/md2conf
 """
-import hashlib
 import logging
 from pathlib import Path
 from typing import Optional
-from .api import ConfluencePage, ConfluenceSession
+from .api import ConfluenceLabel, ConfluenceSession
 from .converter import (
     ConfluenceDocument,
     ConfluenceDocumentOptions,
     ConfluencePageID,
     attachment_name,
-    extract_frontmatter_title,
-    extract_qualified_id,
 )
+from .extra import override
 from .metadata import ConfluencePageMetadata
-from .processor import Converter, Processor, ProcessorFactory
+from .processor import Converter, DocumentNode, Processor, ProcessorFactory
 from .properties import PageError
 LOGGER = logging.getLogger(__name__)
@@ -48,82 +46,78 @@ class SynchronizingProcessor(Processor):
         super().__init__(options, api.site, root_dir)
         self.api = api
-    def _get_or_create_page(
-        self,
-        absolute_path: Path,
-        parent_id: Optional[ConfluencePageID],
-        *,
-        title: Optional[str] = None,
-    ) -> ConfluencePageMetadata:
-        """
-        Creates a new Confluence page if no page is linked in the Markdown document.
+    @override
+    def _synchronize_tree(
+        self, root: DocumentNode, root_id: Optional[ConfluencePageID]
+    ) -> None:
         """
+        Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
-        # parse file
-        with open(absolute_path, "r", encoding="utf-8") as f:
-            text = f.read()
+        Creates new Confluence pages as necessary, e.g. if no page is linked in the Markdown document, or no page is found with lookup by page title.
-        qualified_id, text = extract_qualified_id(text)
+        Updates the original Markdown document to add tags to associate the document with its corresponding Confluence page.
+        """
-        overwrite = False
-        if qualified_id is None:
-            # create new Confluence page
-            if parent_id is None:
+        if root.page_id is None and root_id is None:
+            raise PageError(
+                f"expected: root page ID in options, or explicit page ID in {root.absolute_path}"
+            )
+        elif root.page_id is not None and root_id is not None:
+            if root.page_id != root_id.page_id:
                 raise PageError(
-                    f"expected: parent page ID for Markdown file with no linked Confluence page: {absolute_path}"
+                    f"mismatched inferred page ID of {root_id.page_id} and explicit page ID in {root.absolute_path}"
                 )
-            # assign title from front-matter if present
-            if title is None:
-                title, _ = extract_frontmatter_title(text)
+            real_id = root_id
+        elif root_id is not None:
+            real_id = root_id
+        elif root.page_id is not None:
+            real_id = ConfluencePageID(root.page_id)
+        else:
+            raise NotImplementedError("condition not exhaustive")
-            # use file name (without extension) and path hash if no title is supplied
-            if title is None:
-                overwrite = True
-                relative_path = absolute_path.relative_to(self.root_dir)
-                hash = hashlib.md5(relative_path.as_posix().encode("utf-8"))
-                digest = "".join(f"{c:x}" for c in hash.digest())
-                title = f"{absolute_path.stem} [{digest}]"
+        self._synchronize_subtree(root, real_id)
-            confluence_page = self._create_page(absolute_path, text, title, parent_id)
+    def _synchronize_subtree(
+        self, node: DocumentNode, parent_id: ConfluencePageID
+    ) -> None:
+        if node.page_id is not None:
+            # verify if page exists
+            page = self.api.get_page_properties(node.page_id)
+            update = False
+        elif node.title is not None:
+            # look up page by title
+            page = self.api.get_or_create_page(node.title, parent_id.page_id)
+            update = True
         else:
-            # look up existing Confluence page
-            confluence_page = self.api.get_page(qualified_id.page_id)
-        space_key = (
-            self.api.space_id_to_key(confluence_page.space_id)
-            if confluence_page.space_id
-            else self.site.space_key
-        )
+            # always create a new page
+            digest = self._generate_hash(node.absolute_path)
+            title = f"{node.absolute_path.stem} [{digest}]"
+            page = self.api.create_page(parent_id.page_id, title, "")
+            update = True
+        space_key = self.api.space_id_to_key(page.spaceId)
+        if update:
+            self._update_markdown(
+                node.absolute_path,
+                page_id=page.id,
+                space_key=space_key,
+            )
-        return ConfluencePageMetadata(
-            page_id=confluence_page.id,
+        data = ConfluencePageMetadata(
+            page_id=page.id,
             space_key=space_key,
-            title=confluence_page.title,
-            overwrite=overwrite,
+            title=page.title,
         )
+        self.page_metadata.add(node.absolute_path, data)
-    def _create_page(
-        self,
-        absolute_path: Path,
-        document: str,
-        title: str,
-        parent_id: ConfluencePageID,
-    ) -> ConfluencePage:
-        """
-        Creates a new Confluence page when Markdown file doesn't have an embedded page ID yet.
-        """
-        confluence_page = self.api.get_or_create_page(title, parent_id.page_id)
-        self._update_markdown(
-            absolute_path,
-            document,
-            confluence_page.id,
-            self.api.space_id_to_key(confluence_page.space_id),
-        )
-        return confluence_page
+        for child_node in node.children():
+            self._synchronize_subtree(child_node, ConfluencePageID(page.id))
-    def _save_document(self, document: ConfluenceDocument, path: Path) -> None:
+    @override
+    def _update_page(
+        self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
+    ) -> None:
         """
         Saves a new version of a Confluence document.
@@ -133,37 +127,60 @@ class SynchronizingProcessor(Processor):
         base_path = path.parent
         for image in document.images:
             self.api.upload_attachment(
-                document.id.page_id,
+                page_id.page_id,
                 attachment_name(image),
                 attachment_path=base_path / image,
             )
         for name, data in document.embedded_images.items():
             self.api.upload_attachment(
-                document.id.page_id,
+                page_id.page_id,
                 name,
                 raw_data=data,
             )
         content = document.xhtml()
+        LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
-        # leave title as it is for existing pages, update title for pages with randomly assigned title
-        title = document.title if self.page_metadata[path].overwrite else None
+        title = None
+        if document.title is not None:
+            meta = self.page_metadata.get(path)
+            if (
+                meta is not None
+                and meta.space_key is not None
+                and meta.title != document.title
+            ):
+                conflicting_page_id = self.api.page_exists(
+                    document.title, space_id=self.api.space_key_to_id(meta.space_key)
+                )
+                if conflicting_page_id is None:
+                    title = document.title
+                else:
+                    LOGGER.info(
+                        "Document title of %s conflicts with Confluence page title of %s",
+                        path,
+                        conflicting_page_id,
+                    )
+        self.api.update_page(page_id.page_id, content, title=title)
+        if document.labels is not None:
+            self.api.update_labels(
+                page_id.page_id,
+                [
+                    ConfluenceLabel(name=label, prefix="global")
+                    for label in document.labels
+                ],
+            )
-        LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
-        self.api.update_page(document.id.page_id, content, title=title)
-    def _update_markdown(
-        self,
-        path: Path,
-        document: str,
-        page_id: str,
-        space_key: Optional[str],
-    ) -> None:
+    def _update_markdown(self, path: Path, *, page_id: str, space_key: str) -> None:
         """
         Writes the Confluence page ID and space key at the beginning of the Markdown file.
         """
+        with open(path, "r", encoding="utf-8") as file:
+            document = file.read()
         content: list[str] = []
         # check if the file has frontmatter
@@ -175,9 +192,7 @@ class SynchronizingProcessor(Processor):
             content.append(document[:index])
         content.append(f"<!-- confluence-page-id: {page_id} -->")
-        if space_key:
-            content.append(f"<!-- confluence-space-key: {space_key} -->")
+        content.append(f"<!-- confluence-space-key: {space_key} -->")
         content.append(document[index:])
         with open(path, "w", encoding="utf-8") as file:
@@ -200,6 +215,8 @@ class SynchronizingProcessorFactory(ProcessorFactory):
 class Application(Converter):
     """
     The entry point for Markdown to Confluence conversion.
+    This is the class instantiated by the command-line application.
     """
     def __init__(

md2conf/collection.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+Publish Markdown files to Confluence wiki.
+Copyright 2022-2025, Levente Hunyadi
+:see: https://github.com/hunyadi/md2conf
+"""
+from pathlib import Path
+from typing import Iterable, Optional
+from .metadata import ConfluencePageMetadata
+class ConfluencePageCollection:
+    _metadata: dict[Path, ConfluencePageMetadata]
+    def __init__(self) -> None:
+        self._metadata = {}
+    def __len__(self) -> int:
+        return len(self._metadata)
+    def add(self, path: Path, data: ConfluencePageMetadata) -> None:
+        self._metadata[path] = data
+    def get(self, path: Path) -> Optional[ConfluencePageMetadata]:
+        return self._metadata.get(path)
+    def items(self) -> Iterable[tuple[Path, ConfluencePageMetadata]]:
+        return self._metadata.items()

md2conf/converter.py CHANGED Viewed

@@ -18,16 +18,17 @@ import xml.etree.ElementTree
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Literal, Optional, Union
-from urllib.parse import ParseResult, urlparse, urlunparse
+from urllib.parse import ParseResult, quote_plus, urlparse, urlunparse
 import lxml.etree as ET
 import markdown
-import yaml
 from lxml.builder import ElementMaker
+from .collection import ConfluencePageCollection
 from .mermaid import render_diagram
-from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
+from .metadata import ConfluenceSiteMetadata
 from .properties import PageError
+from .scanner import ScannedDocument, Scanner
 namespaces = {
     "ac": "http://atlassian.com/content",
@@ -66,6 +67,19 @@ def is_relative_url(url: str) -> bool:
     return not bool(urlparts.scheme) and not bool(urlparts.netloc)
+def encode_title(text: str) -> str:
+    "Converts a title string such that it is safe to embed into a Confluence URL."
+    # replace unsafe characters with space
+    text = re.sub(r"[^A-Za-z0-9._~()'!*:@,;+?-]+", " ", text)
+    # replace multiple consecutive spaces with single space
+    text = re.sub(r"\s\s+", " ", text)
+    # URL-encode
+    return quote_plus(text.strip())
 def emoji_generator(
     index: str,
     shortname: str,
@@ -78,8 +92,10 @@ def emoji_generator(
     md: markdown.Markdown,
 ) -> xml.etree.ElementTree.Element:
     name = (alias or shortname).strip(":")
-    span = xml.etree.ElementTree.Element("span", {"data-emoji": name})
+    span = xml.etree.ElementTree.Element("span", {"data-emoji-shortname": name})
     if uc is not None:
+        span.attrib["data-emoji-unicode"] = uc
         # convert series of Unicode code point hexadecimal values into characters
         span.text = "".join(chr(int(item, base=16)) for item in uc.split("-"))
     else:
@@ -349,7 +365,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
     images: list[Path]
     embedded_images: dict[str, bytes]
     site_metadata: ConfluenceSiteMetadata
-    page_metadata: dict[Path, ConfluencePageMetadata]
+    page_metadata: ConfluencePageCollection
     def __init__(
         self,
@@ -357,7 +373,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
         path: Path,
         root_dir: Path,
         site_metadata: ConfluenceSiteMetadata,
-        page_metadata: dict[Path, ConfluencePageMetadata],
+        page_metadata: ConfluencePageCollection,
     ) -> None:
         super().__init__()
         self.options = options
@@ -466,7 +482,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
                     "Confluence space key required for building full web URLs"
                 )
-            page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{link_metadata.title}"
+            page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{encode_title(link_metadata.title)}"
         components = ParseResult(
             scheme="https",
@@ -821,7 +837,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
         )
     def _transform_emoji(self, elem: ET._Element) -> ET._Element:
-        shortname = elem.attrib.get("data-emoji", "")
+        shortname = elem.attrib.get("data-emoji-shortname", "")
+        unicode = elem.attrib.get("data-emoji-unicode", None)
         alt = elem.text or ""
         # <ac:emoticon ac:name="wink" ac:emoji-shortname=":wink:" ac:emoji-id="1f609" ac:emoji-fallback="&#128521;"/>
@@ -831,8 +848,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
             "emoticon",
             {
                 # use "blue-star" as a placeholder name to ensure wiki page loads in timely manner
-                ET.QName(namespaces["ac"], "name"): "blue-star",
+                ET.QName(namespaces["ac"], "name"): shortname,
                 ET.QName(namespaces["ac"], "emoji-shortname"): f":{shortname}:",
+                ET.QName(namespaces["ac"], "emoji-id"): unicode,
                 ET.QName(namespaces["ac"], "emoji-fallback"): alt,
             },
         )
@@ -930,7 +948,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
         elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
             return self._transform_block(child[0])
-        elif child.tag == "span" and child.attrib.has_key("data-emoji"):
+        elif child.tag == "span" and child.attrib.has_key("data-emoji-shortname"):
             return self._transform_emoji(child)
         return None
@@ -949,78 +967,15 @@ class DocumentError(RuntimeError):
     "Raised when a converted Markdown document has an unexpected element or attribute."
-def extract_value(pattern: str, text: str) -> tuple[Optional[str], str]:
-    values: list[str] = []
-    def _repl_func(matchobj: re.Match) -> str:
-        values.append(matchobj.group(1))
-        return ""
-    text = re.sub(pattern, _repl_func, text, 1, re.ASCII)
-    value = values[0] if values else None
-    return value, text
 @dataclass
 class ConfluencePageID:
     page_id: str
-    def __init__(self, page_id: str):
-        self.page_id = page_id
 @dataclass
 class ConfluenceQualifiedID:
     page_id: str
-    space_key: Optional[str] = None
-    def __init__(self, page_id: str, space_key: Optional[str] = None):
-        self.page_id = page_id
-        self.space_key = space_key
-def extract_qualified_id(text: str) -> tuple[Optional[ConfluenceQualifiedID], str]:
-    "Extracts the Confluence page ID and space key from a Markdown document."
-    page_id, text = extract_value(r"<!--\s+confluence-page-id:\s*(\d+)\s+-->", text)
-    if page_id is None:
-        return None, text
-    # extract Confluence space key
-    space_key, text = extract_value(r"<!--\s+confluence-space-key:\s*(\S+)\s+-->", text)
-    return ConfluenceQualifiedID(page_id, space_key), text
-def extract_frontmatter(text: str) -> tuple[Optional[str], str]:
-    "Extracts the front matter from a Markdown document."
-    return extract_value(r"(?ms)\A---$(.+?)^---$", text)
-def extract_frontmatter_title(text: str) -> tuple[Optional[str], str]:
-    frontmatter, text = extract_frontmatter(text)
-    title: Optional[str] = None
-    if frontmatter is not None:
-        properties = yaml.safe_load(frontmatter)
-        if isinstance(properties, dict):
-            property_title = properties.get("title")
-            if isinstance(property_title, str):
-                title = property_title
-    return title, text
-def read_qualified_id(absolute_path: Path) -> Optional[ConfluenceQualifiedID]:
-    "Reads the Confluence page ID and space key from a Markdown document."
-    with open(absolute_path, "r", encoding="utf-8") as f:
-        document = f.read()
-    qualified_id, _ = extract_qualified_id(document)
-    return qualified_id
+    space_key: str
 @dataclass
@@ -1055,8 +1010,8 @@ class ConversionError(RuntimeError):
 class ConfluenceDocument:
-    id: ConfluenceQualifiedID
     title: Optional[str]
+    labels: Optional[list[str]]
     links: list[str]
     images: list[Path]
@@ -1070,64 +1025,48 @@ class ConfluenceDocument:
         options: ConfluenceDocumentOptions,
         root_dir: Path,
         site_metadata: ConfluenceSiteMetadata,
-        page_metadata: dict[Path, ConfluencePageMetadata],
-    ) -> "ConfluenceDocument":
+        page_metadata: ConfluencePageCollection,
+    ) -> tuple[ConfluencePageID, "ConfluenceDocument"]:
         path = path.resolve(True)
-        with open(path, "r", encoding="utf-8") as f:
-            text = f.read()
+        document = Scanner().read(path)
-        # extract Confluence page ID
-        qualified_id, text = extract_qualified_id(text)
-        if qualified_id is None:
+        if document.page_id is not None:
+            page_id = ConfluencePageID(document.page_id)
+        else:
             # look up Confluence page ID in metadata
             metadata = page_metadata.get(path)
             if metadata is not None:
-                qualified_id = ConfluenceQualifiedID(
-                    metadata.page_id, metadata.space_key
-                )
-        if qualified_id is None:
-            raise PageError("missing Confluence page ID")
+                page_id = ConfluencePageID(metadata.page_id)
+            else:
+                raise PageError("missing Confluence page ID")
-        return ConfluenceDocument(
-            path, text, qualified_id, options, root_dir, site_metadata, page_metadata
+        return page_id, ConfluenceDocument(
+            path, document, options, root_dir, site_metadata, page_metadata
         )
     def __init__(
         self,
         path: Path,
-        text: str,
-        qualified_id: ConfluenceQualifiedID,
+        document: ScannedDocument,
         options: ConfluenceDocumentOptions,
         root_dir: Path,
         site_metadata: ConfluenceSiteMetadata,
-        page_metadata: dict[Path, ConfluencePageMetadata],
+        page_metadata: ConfluencePageCollection,
     ) -> None:
         self.options = options
-        self.id = qualified_id
-        # extract frontmatter
-        self.title, text = extract_frontmatter_title(text)
-        # extract 'generated-by' tag text
-        generated_by_tag, text = extract_value(
-            r"<!--\s+generated-by:\s*(.*)\s+-->", text
-        )
         # convert to HTML
-        html = markdown_to_html(text)
+        html = markdown_to_html(document.text)
         # parse Markdown document
         if self.options.generated_by is not None:
-            if generated_by_tag is not None:
-                generated_by_text = generated_by_tag
-            else:
-                generated_by_text = self.options.generated_by
+            generated_by = document.generated_by or self.options.generated_by
         else:
-            generated_by_text = None
+            generated_by = None
-        if generated_by_text is not None:
-            generated_by_html = markdown_to_html(generated_by_text)
+        if generated_by is not None:
+            generated_by_html = markdown_to_html(generated_by)
             content = [
                 '<ac:structured-macro ac:name="info" ac:schema-version="1">',
@@ -1161,8 +1100,8 @@ class ConfluenceDocument:
         self.images = converter.images
         self.embedded_images = converter.embedded_images
-        if self.title is None:
-            self.title = converter.toc.get_title()
+        self.title = document.title or converter.toc.get_title()
+        self.labels = document.tags
     def xhtml(self) -> str:
         return elements_to_string(self.root)
@@ -1214,7 +1153,7 @@ def _content_to_string(dtd_path: Path, content: str) -> str:
     data = [
         '<?xml version="1.0"?>',
-        f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path}">'
+        f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}">'
         f"<root{ns_attr_list}>",
     ]
     data.append(content)

md2conf/emoji.py CHANGED Viewed

@@ -10,7 +10,24 @@ import pathlib
 import pymdownx.emoji1_db as emoji_db
-EMOJI_PAGE_ID = "86918529216"
+EMOJI_PAGE_ID = "13500452"
+def to_html(cp: int) -> str:
+    """
+    Returns the safe HTML representation for a Unicode code point.
+    Converts non-ASCII and non-printable characters into HTML entities with decimal notation.
+    :param cp: Unicode code point.
+    :returns: An HTML representation of the Unicode character.
+    """
+    ch = chr(cp)
+    if ch.isascii() and ch.isalnum():
+        return ch
+    else:
+        return f"&#{cp};"
 def generate_source(path: pathlib.Path) -> None:
@@ -47,11 +64,19 @@ def generate_target(path: pathlib.Path) -> None:
         print("<thead><tr><th>Icon</th><th>Emoji code</th></tr></thead>", file=f)
         print("<tbody>", file=f)
         for key, data in emojis.items():
+            unicode = data["unicode"]
             key = key.strip(":")
-            unicode = "".join(f"&#x{item};" for item in data["unicode"].split("-"))
+            html = "".join(to_html(int(item, base=16)) for item in unicode.split("-"))
             print(
-                f'<tr><td><ac:emoticon ac:name="blue-star" ac:emoji-shortname=":{key}:" ac:emoji-fallback="{unicode}"/></td><td><code>:{key}:</code></td></tr>',
+                f"<tr>\n"
+                f"  <td>\n"
+                f'    <ac:emoticon ac:name="{key}" ac:emoji-shortname=":{key}:" ac:emoji-id="{unicode}" ac:emoji-fallback="{html}"/>\n'
+                f"  </td>\n"
+                f"  <td>\n"
+                f"    <code>:{key}:</code>\n"
+                f"  </td>\n"
+                f"</tr>",
                 file=f,
             )
         print("</tbody>", file=f)

md2conf/extra.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+Publish Markdown files to Confluence wiki.
+Copyright 2022-2025, Levente Hunyadi
+:see: https://github.com/hunyadi/md2conf
+"""
+import sys
+if sys.version_info >= (3, 12):
+    from typing import override as override  # noqa: F401
+else:
+    from typing_extensions import override as override  # noqa: F401

markdown-to-confluence 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

markdown-to-confluence 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl