PyPI - markdown-to-confluence - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

markdown-to-confluence 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/METADATA +24 -11
markdown_to_confluence-0.3.5.dist-info/RECORD +23 -0
{markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/WHEEL +1 -1
md2conf/__init__.py +1 -1
md2conf/__main__.py +6 -5
md2conf/api.py +235 -45
md2conf/application.py +100 -182
md2conf/converter.py +53 -112
md2conf/local.py +125 -0
md2conf/matcher.py +54 -13
md2conf/mermaid.py +10 -4
md2conf/metadata.py +42 -0
md2conf/processor.py +158 -90
md2conf/scanner.py +117 -0
markdown_to_confluence-0.3.3.dist-info/RECORD +0 -20
{markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/entry_points.txt +0 -0
{markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/licenses/LICENSE +0 -0
{markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/top_level.txt +0 -0
{markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/zip-safe +0 -0

md2conf/application.py CHANGED Viewed

@@ -8,7 +8,6 @@ Copyright 2022-2025, Levente Hunyadi
 import hashlib
 import logging
-import os
 from pathlib import Path
 from typing import Optional
@@ -16,213 +15,78 @@ from .api import ConfluencePage, ConfluenceSession
 from .converter import (
     ConfluenceDocument,
     ConfluenceDocumentOptions,
-    ConfluencePageMetadata,
-    ConfluenceQualifiedID,
-    ConfluenceSiteMetadata,
+    ConfluencePageID,
     attachment_name,
-    extract_frontmatter_title,
-    extract_qualified_id,
-    read_qualified_id,
 )
-from .matcher import Matcher, MatcherOptions
-from .properties import ArgumentError, PageError
+from .metadata import ConfluencePageMetadata
+from .processor import Converter, Processor, ProcessorFactory
+from .properties import PageError
+from .scanner import Scanner
 LOGGER = logging.getLogger(__name__)
-class Application:
-    "The entry point for Markdown to Confluence conversion."
+class SynchronizingProcessor(Processor):
+    """
+    Synchronizes a single Markdown page or a directory of Markdown pages with Confluence.
+    """
     api: ConfluenceSession
-    options: ConfluenceDocumentOptions
     def __init__(
-        self, api: ConfluenceSession, options: ConfluenceDocumentOptions
-    ) -> None:
-        self.api = api
-        self.options = options
-    def synchronize(self, path: Path) -> None:
-        "Synchronizes a single Markdown page or a directory of Markdown pages."
-        path = path.resolve(True)
-        if path.is_dir():
-            self.synchronize_directory(path)
-        elif path.is_file():
-            self.synchronize_page(path)
-        else:
-            raise ArgumentError(f"expected: valid file or directory path; got: {path}")
-    def synchronize_page(
-        self, page_path: Path, root_dir: Optional[Path] = None
-    ) -> None:
-        "Synchronizes a single Markdown page with Confluence."
-        page_path = page_path.resolve(True)
-        if root_dir is None:
-            root_dir = page_path.parent
-        else:
-            root_dir = root_dir.resolve(True)
-        self._synchronize_page(page_path, root_dir, {})
-    def synchronize_directory(
-        self, local_dir: Path, root_dir: Optional[Path] = None
-    ) -> None:
-        "Synchronizes a directory of Markdown pages with Confluence."
-        local_dir = local_dir.resolve(True)
-        if root_dir is None:
-            root_dir = local_dir
-        else:
-            root_dir = root_dir.resolve(True)
-        LOGGER.info("Synchronizing directory: %s", local_dir)
-        # Step 1: build index of all page metadata
-        page_metadata: dict[Path, ConfluencePageMetadata] = {}
-        root_id = (
-            ConfluenceQualifiedID(self.options.root_page_id, self.api.space_key)
-            if self.options.root_page_id
-            else None
-        )
-        self._index_directory(local_dir, root_dir, root_id, page_metadata)
-        LOGGER.info("Indexed %d page(s)", len(page_metadata))
-        # Step 2: convert each page
-        for page_path in page_metadata.keys():
-            self._synchronize_page(page_path, root_dir, page_metadata)
-    def _synchronize_page(
-        self,
-        page_path: Path,
-        root_dir: Path,
-        page_metadata: dict[Path, ConfluencePageMetadata],
-    ) -> None:
-        base_path = page_path.parent
-        LOGGER.info("Synchronizing page: %s", page_path)
-        site_metadata = ConfluenceSiteMetadata(
-            domain=self.api.domain,
-            base_path=self.api.base_path,
-            space_key=self.api.space_key,
-        )
-        document = ConfluenceDocument.create(
-            page_path, self.options, root_dir, site_metadata, page_metadata
-        )
-        self._update_document(document, base_path)
-    def _index_directory(
-        self,
-        local_dir: Path,
-        root_dir: Path,
-        root_id: Optional[ConfluenceQualifiedID],
-        page_metadata: dict[Path, ConfluencePageMetadata],
+        self, api: ConfluenceSession, options: ConfluenceDocumentOptions, root_dir: Path
     ) -> None:
-        "Indexes Markdown files in a directory recursively."
-        LOGGER.info("Indexing directory: %s", local_dir)
-        matcher = Matcher(MatcherOptions(source=".mdignore", extension="md"), local_dir)
-        files: list[Path] = []
-        directories: list[Path] = []
-        for entry in os.scandir(local_dir):
-            if matcher.is_excluded(entry.name, entry.is_dir()):
-                continue
-            if entry.is_file():
-                files.append(Path(local_dir) / entry.name)
-            elif entry.is_dir():
-                directories.append(Path(local_dir) / entry.name)
-        # make page act as parent node in Confluence
-        parent_doc: Optional[Path] = None
-        if (Path(local_dir) / "index.md") in files:
-            parent_doc = Path(local_dir) / "index.md"
-        elif (Path(local_dir) / "README.md") in files:
-            parent_doc = Path(local_dir) / "README.md"
-        elif (Path(local_dir) / f"{local_dir.name}.md") in files:
-            parent_doc = Path(local_dir) / f"{local_dir.name}.md"
-        if parent_doc is None and self.options.keep_hierarchy:
-            parent_doc = Path(local_dir) / "index.md"
-            # create a blank page in Confluence for the directory entry
-            with open(parent_doc, "w"):
-                pass
-        if parent_doc is not None:
-            files.remove(parent_doc)
-            metadata = self._get_or_create_page(parent_doc, root_dir, root_id)
-            LOGGER.debug("Indexed parent %s with metadata: %s", parent_doc, metadata)
-            page_metadata[parent_doc] = metadata
-            parent_id = read_qualified_id(parent_doc) or root_id
-        else:
-            parent_id = root_id
+        """
+        Initializes a new processor instance.
-        for doc in files:
-            metadata = self._get_or_create_page(doc, root_dir, parent_id)
-            LOGGER.debug("Indexed %s with metadata: %s", doc, metadata)
-            page_metadata[doc] = metadata
+        :param api: Holds information about an open session to a Confluence server.
+        :param options: Options that control the generated page content.
+        :param root_dir: File system directory that acts as topmost root node.
+        """
-        for directory in directories:
-            self._index_directory(directory, root_dir, parent_id, page_metadata)
+        super().__init__(options, api.site, root_dir)
+        self.api = api
     def _get_or_create_page(
-        self,
-        absolute_path: Path,
-        root_dir: Path,
-        parent_id: Optional[ConfluenceQualifiedID],
-        *,
-        title: Optional[str] = None,
+        self, absolute_path: Path, parent_id: Optional[ConfluencePageID]
     ) -> ConfluencePageMetadata:
         """
         Creates a new Confluence page if no page is linked in the Markdown document.
         """
         # parse file
-        with open(absolute_path, "r", encoding="utf-8") as f:
-            document = f.read()
-        qualified_id, document = extract_qualified_id(document)
+        document = Scanner().read(absolute_path)
-        if qualified_id is not None:
-            confluence_page = self.api.get_page(qualified_id.page_id)
-        else:
+        overwrite = False
+        if document.page_id is None:
+            # create new Confluence page
             if parent_id is None:
                 raise PageError(
                     f"expected: parent page ID for Markdown file with no linked Confluence page: {absolute_path}"
                 )
-            # assign title from front-matter if present
-            if title is None:
-                title, _ = extract_frontmatter_title(document)
             # use file name (without extension) and path hash if no title is supplied
-            if title is None:
-                relative_path = absolute_path.relative_to(root_dir)
+            if document.title is not None:
+                title = document.title
+            else:
+                overwrite = True
+                relative_path = absolute_path.relative_to(self.root_dir)
                 hash = hashlib.md5(relative_path.as_posix().encode("utf-8"))
                 digest = "".join(f"{c:x}" for c in hash.digest())
                 title = f"{absolute_path.stem} [{digest}]"
             confluence_page = self._create_page(
-                absolute_path, document, title, parent_id
+                absolute_path, document.text, title, parent_id
             )
-        space_key = (
-            self.api.space_id_to_key(confluence_page.space_id)
-            if confluence_page.space_id
-            else self.api.space_key
-        )
+        else:
+            # look up existing Confluence page
+            confluence_page = self.api.get_page(document.page_id)
         return ConfluencePageMetadata(
             page_id=confluence_page.id,
-            space_key=space_key,
-            title=confluence_page.title or "",
+            space_key=self.api.space_id_to_key(confluence_page.space_id),
+            title=confluence_page.title,
+            overwrite=overwrite,
         )
     def _create_page(
@@ -230,13 +94,13 @@ class Application:
         absolute_path: Path,
         document: str,
         title: str,
-        parent_id: ConfluenceQualifiedID,
+        parent_id: ConfluencePageID,
     ) -> ConfluencePage:
-        "Creates a new Confluence page when Markdown file doesn't have an embedded page ID yet."
+        """
+        Creates a new Confluence page when Markdown file doesn't have an embedded page ID yet.
+        """
-        confluence_page = self.api.get_or_create_page(
-            title, parent_id.page_id, space_key=parent_id.space_key
-        )
+        confluence_page = self.api.get_or_create_page(title, parent_id.page_id)
         self._update_markdown(
             absolute_path,
             document,
@@ -245,26 +109,52 @@ class Application:
         )
         return confluence_page
-    def _update_document(self, document: ConfluenceDocument, base_path: Path) -> None:
-        "Saves a new version of a Confluence document."
+    def _save_document(
+        self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
+    ) -> None:
+        """
+        Saves a new version of a Confluence document.
+        Invokes Confluence REST API to persist the new version.
+        """
+        base_path = path.parent
         for image in document.images:
             self.api.upload_attachment(
-                document.id.page_id,
+                page_id.page_id,
                 attachment_name(image),
                 attachment_path=base_path / image,
             )
         for name, data in document.embedded_images.items():
             self.api.upload_attachment(
-                document.id.page_id,
+                page_id.page_id,
                 name,
                 raw_data=data,
             )
         content = document.xhtml()
         LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
-        self.api.update_page(document.id.page_id, content, title=document.title)
+        title = None
+        if document.title is not None:
+            meta = self.page_metadata[path]
+            # update title only for pages with randomly assigned title
+            if meta.overwrite:
+                conflicting_page_id = self.api.page_exists(
+                    document.title, space_id=self.api.space_key_to_id(meta.space_key)
+                )
+                if conflicting_page_id is None:
+                    title = document.title
+                else:
+                    LOGGER.info(
+                        "Document title of %s conflicts with Confluence page title of %s",
+                        path,
+                        conflicting_page_id,
+                    )
+        self.api.update_page(page_id.page_id, content, title=title)
     def _update_markdown(
         self,
@@ -273,7 +163,9 @@ class Application:
         page_id: str,
         space_key: Optional[str],
     ) -> None:
-        "Writes the Confluence page ID and space key at the beginning of the Markdown file."
+        """
+        Writes the Confluence page ID and space key at the beginning of the Markdown file.
+        """
         content: list[str] = []
@@ -293,3 +185,29 @@ class Application:
         with open(path, "w", encoding="utf-8") as file:
             file.write("\n".join(content))
+class SynchronizingProcessorFactory(ProcessorFactory):
+    api: ConfluenceSession
+    def __init__(
+        self, api: ConfluenceSession, options: ConfluenceDocumentOptions
+    ) -> None:
+        super().__init__(options, api.site)
+        self.api = api
+    def create(self, root_dir: Path) -> Processor:
+        return SynchronizingProcessor(self.api, self.options, root_dir)
+class Application(Converter):
+    """
+    The entry point for Markdown to Confluence conversion.
+    This is the class instantiated by the command-line application.
+    """
+    def __init__(
+        self, api: ConfluenceSession, options: ConfluenceDocumentOptions
+    ) -> None:
+        super().__init__(SynchronizingProcessorFactory(api, options))

md2conf/converter.py CHANGED Viewed

@@ -18,15 +18,16 @@ import xml.etree.ElementTree
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Literal, Optional, Union
-from urllib.parse import ParseResult, urlparse, urlunparse
+from urllib.parse import ParseResult, quote_plus, urlparse, urlunparse
 import lxml.etree as ET
 import markdown
-import yaml
 from lxml.builder import ElementMaker
 from .mermaid import render_diagram
+from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
 from .properties import PageError
+from .scanner import ScannedDocument, Scanner
 namespaces = {
     "ac": "http://atlassian.com/content",
@@ -65,6 +66,19 @@ def is_relative_url(url: str) -> bool:
     return not bool(urlparts.scheme) and not bool(urlparts.netloc)
+def encode_title(text: str) -> str:
+    "Converts a title string such that it is safe to embed into a Confluence URL."
+    # replace unsafe characters with space
+    text = re.sub(r"[^A-Za-z0-9._~()'!*:@,;+?-]+", " ", text)
+    # replace multiple consecutive spaces with single space
+    text = re.sub(r"\s\s+", " ", text)
+    # URL-encode
+    return quote_plus(text.strip())
 def emoji_generator(
     index: str,
     shortname: str,
@@ -142,8 +156,8 @@ def _elements_from_strings(dtd_path: Path, items: list[str]) -> ET._Element:
     try:
         return ET.fromstringlist(data, parser=parser)
-    except ET.XMLSyntaxError as e:
-        raise ParseError(e)
+    except ET.XMLSyntaxError as ex:
+        raise ParseError() from ex
 def elements_from_strings(items: list[str]) -> ET._Element:
@@ -240,20 +254,6 @@ _languages = [
 ]
-@dataclass
-class ConfluenceSiteMetadata:
-    domain: str
-    base_path: str
-    space_key: Optional[str]
-@dataclass
-class ConfluencePageMetadata:
-    page_id: str
-    space_key: Optional[str]
-    title: str
 class NodeVisitor:
     def visit(self, node: ET._Element) -> None:
         "Recursively visits all descendants of this node."
@@ -479,7 +479,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
                     "Confluence space key required for building full web URLs"
                 )
-            page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{link_metadata.title}"
+            page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{encode_title(link_metadata.title)}"
         components = ParseResult(
             scheme="https",
@@ -962,70 +962,15 @@ class DocumentError(RuntimeError):
     "Raised when a converted Markdown document has an unexpected element or attribute."
-def extract_value(pattern: str, text: str) -> tuple[Optional[str], str]:
-    values: list[str] = []
-    def _repl_func(matchobj: re.Match) -> str:
-        values.append(matchobj.group(1))
-        return ""
-    text = re.sub(pattern, _repl_func, text, 1, re.ASCII)
-    value = values[0] if values else None
-    return value, text
+@dataclass
+class ConfluencePageID:
+    page_id: str
 @dataclass
 class ConfluenceQualifiedID:
     page_id: str
-    space_key: Optional[str] = None
-    def __init__(self, page_id: str, space_key: Optional[str] = None):
-        self.page_id = page_id
-        self.space_key = space_key
-def extract_qualified_id(text: str) -> tuple[Optional[ConfluenceQualifiedID], str]:
-    "Extracts the Confluence page ID and space key from a Markdown document."
-    page_id, text = extract_value(r"<!--\s+confluence-page-id:\s*(\d+)\s+-->", text)
-    if page_id is None:
-        return None, text
-    # extract Confluence space key
-    space_key, text = extract_value(r"<!--\s+confluence-space-key:\s*(\S+)\s+-->", text)
-    return ConfluenceQualifiedID(page_id, space_key), text
-def extract_frontmatter(text: str) -> tuple[Optional[str], str]:
-    "Extracts the front matter from a Markdown document."
-    return extract_value(r"(?ms)\A---$(.+?)^---$", text)
-def extract_frontmatter_title(text: str) -> tuple[Optional[str], str]:
-    frontmatter, text = extract_frontmatter(text)
-    title: Optional[str] = None
-    if frontmatter is not None:
-        properties = yaml.safe_load(frontmatter)
-        if isinstance(properties, dict):
-            property_title = properties.get("title")
-            if isinstance(property_title, str):
-                title = property_title
-    return title, text
-def read_qualified_id(absolute_path: Path) -> Optional[ConfluenceQualifiedID]:
-    "Reads the Confluence page ID and space key from a Markdown document."
-    with open(absolute_path, "r", encoding="utf-8") as f:
-        document = f.read()
-    qualified_id, _ = extract_qualified_id(document)
-    return qualified_id
+    space_key: str
 @dataclass
@@ -1048,15 +993,18 @@ class ConfluenceDocumentOptions:
     ignore_invalid_url: bool = False
     heading_anchors: bool = False
     generated_by: Optional[str] = "This page has been generated with a tool."
-    root_page_id: Optional[str] = None
+    root_page_id: Optional[ConfluencePageID] = None
     keep_hierarchy: bool = False
     render_mermaid: bool = False
     diagram_output_format: Literal["png", "svg"] = "png"
     webui_links: bool = False
+class ConversionError(RuntimeError):
+    "Raised when a Markdown document cannot be converted to Confluence Storage Format."
 class ConfluenceDocument:
-    id: ConfluenceQualifiedID
     title: Optional[str]
     links: list[str]
     images: list[Path]
@@ -1072,67 +1020,61 @@ class ConfluenceDocument:
         root_dir: Path,
         site_metadata: ConfluenceSiteMetadata,
         page_metadata: dict[Path, ConfluencePageMetadata],
-    ) -> "ConfluenceDocument":
+    ) -> tuple[ConfluencePageID, "ConfluenceDocument"]:
         path = path.resolve(True)
-        with open(path, "r", encoding="utf-8") as f:
-            text = f.read()
+        document = Scanner().read(path)
-        # extract Confluence page ID
-        qualified_id, text = extract_qualified_id(text)
-        if qualified_id is None:
+        if document.page_id is not None:
+            page_id = ConfluencePageID(document.page_id)
+        else:
             # look up Confluence page ID in metadata
             metadata = page_metadata.get(path)
             if metadata is not None:
-                qualified_id = ConfluenceQualifiedID(
-                    metadata.page_id, metadata.space_key
-                )
-        if qualified_id is None:
-            raise PageError("missing Confluence page ID")
+                page_id = ConfluencePageID(metadata.page_id)
+            else:
+                raise PageError("missing Confluence page ID")
-        return ConfluenceDocument(
-            path, text, qualified_id, options, root_dir, site_metadata, page_metadata
+        return page_id, ConfluenceDocument(
+            path, document, options, root_dir, site_metadata, page_metadata
         )
     def __init__(
         self,
         path: Path,
-        text: str,
-        qualified_id: ConfluenceQualifiedID,
+        document: ScannedDocument,
         options: ConfluenceDocumentOptions,
         root_dir: Path,
         site_metadata: ConfluenceSiteMetadata,
         page_metadata: dict[Path, ConfluencePageMetadata],
     ) -> None:
         self.options = options
-        self.id = qualified_id
-        # extract 'generated-by' tag text
-        generated_by_tag, text = extract_value(
-            r"<!--\s+generated-by:\s*(.*)\s+-->", text
-        )
-        # extract frontmatter
-        self.title, text = extract_frontmatter_title(text)
         # convert to HTML
-        html = markdown_to_html(text)
+        html = markdown_to_html(document.text)
         # parse Markdown document
         if self.options.generated_by is not None:
-            generated_by = self.options.generated_by
-            if generated_by_tag is not None:
-                generated_by = generated_by_tag
+            generated_by = document.generated_by or self.options.generated_by
+        else:
+            generated_by = None
+        if generated_by is not None:
+            generated_by_html = markdown_to_html(generated_by)
             content = [
                 '<ac:structured-macro ac:name="info" ac:schema-version="1">',
-                f"<ac:rich-text-body><p>{generated_by}</p></ac:rich-text-body>",
+                f"<ac:rich-text-body>{generated_by_html}</ac:rich-text-body>",
                 "</ac:structured-macro>",
                 html,
             ]
         else:
             content = [html]
-        self.root = elements_from_strings(content)
+        try:
+            self.root = elements_from_strings(content)
+        except ParseError as ex:
+            raise ConversionError(path) from ex
         converter = ConfluenceStorageFormatConverter(
             ConfluenceConverterOptions(
@@ -1152,8 +1094,7 @@ class ConfluenceDocument:
         self.images = converter.images
         self.embedded_images = converter.embedded_images
-        if self.title is None:
-            self.title = converter.toc.get_title()
+        self.title = document.title or converter.toc.get_title()
     def xhtml(self) -> str:
         return elements_to_string(self.root)

markdown-to-confluence 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

markdown-to-confluence 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl