PyPI - markdown-to-confluence - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

markdown-to-confluence 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/METADATA +131 -14
markdown_to_confluence-0.4.0.dist-info/RECORD +25 -0
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/WHEEL +1 -1
md2conf/__init__.py +1 -1
md2conf/__main__.py +18 -7
md2conf/api.py +492 -187
md2conf/application.py +100 -83
md2conf/collection.py +31 -0
md2conf/converter.py +51 -112
md2conf/emoji.py +28 -3
md2conf/extra.py +14 -0
md2conf/local.py +33 -45
md2conf/matcher.py +54 -13
md2conf/mermaid.py +10 -4
md2conf/metadata.py +1 -3
md2conf/processor.py +137 -43
md2conf/properties.py +24 -5
md2conf/scanner.py +149 -0
markdown_to_confluence-0.3.4.dist-info/RECORD +0 -22
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/entry_points.txt +0 -0
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/licenses/LICENSE +0 -0
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/top_level.txt +0 -0
{markdown_to_confluence-0.3.4.dist-info → markdown_to_confluence-0.4.0.dist-info}/zip-safe +0 -0

md2conf/local.py CHANGED Viewed

@@ -6,22 +6,15 @@ Copyright 2022-2025, Levente Hunyadi
 :see: https://github.com/hunyadi/md2conf
 """
-import hashlib
 import logging
 import os
 from pathlib import Path
 from typing import Optional
-from .converter import (
-    ConfluenceDocument,
-    ConfluenceDocumentOptions,
-    ConfluencePageID,
-    ConfluenceQualifiedID,
-    extract_qualified_id,
-)
+from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
+from .extra import override
 from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
-from .processor import Converter, Processor, ProcessorFactory
-from .properties import PageError
+from .processor import Converter, DocumentNode, Processor, ProcessorFactory
 LOGGER = logging.getLogger(__name__)
@@ -51,46 +44,41 @@ class LocalProcessor(Processor):
         super().__init__(options, site, root_dir)
         self.out_dir = out_dir or root_dir
-    def _get_or_create_page(
-        self,
-        absolute_path: Path,
-        parent_id: Optional[ConfluencePageID],
-        *,
-        title: Optional[str] = None,
-    ) -> ConfluencePageMetadata:
-        """
-        Extracts metadata from a Markdown file.
+    @override
+    def _synchronize_tree(
+        self, root: DocumentNode, root_id: Optional[ConfluencePageID]
+    ) -> None:
         """
+        Creates the cross-reference index.
-        # parse file
-        with open(absolute_path, "r", encoding="utf-8") as f:
-            text = f.read()
-        qualified_id, text = extract_qualified_id(text)
+        Does not change Markdown files.
+        """
-        if qualified_id is None:
-            if parent_id is None:
-                raise PageError(
-                    f"expected: parent page ID for Markdown file with no linked Confluence page: {absolute_path}"
+        for node in root.all():
+            if node.page_id is not None:
+                page_id = node.page_id
+            else:
+                digest = self._generate_hash(node.absolute_path)
+                LOGGER.info(
+                    "Identifier %s assigned to page: %s", digest, node.absolute_path
                 )
-            hash = hashlib.md5(text.encode("utf-8"))
-            digest = "".join(f"{c:x}" for c in hash.digest())
-            LOGGER.info("Identifier %s assigned to page: %s", digest, absolute_path)
-            qualified_id = ConfluenceQualifiedID(digest)
-        return ConfluencePageMetadata(
-            page_id=qualified_id.page_id,
-            space_key=qualified_id.space_key,
-            title="",
-            overwrite=True,
-        )
-    def _save_document(self, document: ConfluenceDocument, path: Path) -> None:
+                page_id = digest
+            self.page_metadata.add(
+                node.absolute_path,
+                ConfluencePageMetadata(
+                    page_id=page_id,
+                    space_key=node.space_key or self.site.space_key or "HOME",
+                    title=node.title or "",
+                ),
+            )
+    @override
+    def _update_page(
+        self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
+    ) -> None:
         """
-        Saves a new version of a Confluence document.
-        A derived class may invoke Confluence REST API to persist the new version.
+        Saves the document as Confluence Storage Format XHTML to the local disk.
         """
         content = document.xhtml()

md2conf/matcher.py CHANGED Viewed

@@ -10,15 +10,15 @@ import os.path
 from dataclasses import dataclass
 from fnmatch import fnmatch
 from pathlib import Path
-from typing import Iterable, Optional
+from typing import Iterable, Optional, Union, overload
-@dataclass
+@dataclass(frozen=True)
 class Entry:
     """
     Represents a file or directory entry.
-    :param name: Name of the file-system entry.
+    :param name: Name of the file-system entry to match against the rule-set.
     :param is_dir: True if the entry is a directory.
     """
@@ -43,6 +43,15 @@ class MatcherOptions:
             self.extension = f".{self.extension}"
+def _entry_name_dir(entry: Union[Entry, os.DirEntry[str]]) -> tuple[str, bool]:
+    if isinstance(entry, Entry):
+        return entry.name, entry.is_dir
+    elif isinstance(entry, os.DirEntry):
+        return entry.name, entry.is_dir()
+    else:
+        raise NotImplementedError("type match not exhaustive")
 class Matcher:
     "Compares file and directory names against a list of exclude/include patterns."
@@ -58,20 +67,40 @@ class Matcher:
         else:
             self.rules = []
+        for rule in self.rules:
+            if "/" in rule or os.path.sep in rule:
+                raise ValueError(f"nested matching not supported: {rule}")
     def extension_matches(self, name: str) -> bool:
         "True if the file name has the expected extension."
         return self.options.extension is None or name.endswith(self.options.extension)
-    def is_excluded(self, name: str, is_dir: bool) -> bool:
+    @overload
+    def is_excluded(self, entry: Entry) -> bool:
+        """
+        True if the file or directory name matches any of the exclusion patterns.
+        :param entry: A data-class object.
+        :returns: True if the name matches at least one of the exclusion patterns.
+        """
+        ...
+    @overload
+    def is_excluded(self, entry: os.DirEntry[str]) -> bool:
         """
         True if the file or directory name matches any of the exclusion patterns.
-        :param name: Name to match against the rule-set.
-        :param is_dir: Whether the name identifies a directory.
+        :param entry: An object returned by `scandir`.
         :returns: True if the name matches at least one of the exclusion patterns.
         """
+        ...
+    def is_excluded(self, entry: Union[Entry, os.DirEntry[str]]) -> bool:
+        name, is_dir = _entry_name_dir(entry)
         # skip hidden files and directories
         if name.startswith("."):
             return True
@@ -86,26 +115,38 @@ class Matcher:
         else:
             return False
-    def is_included(self, name: str, is_dir: bool) -> bool:
+    @overload
+    def is_included(self, entry: Entry) -> bool:
+        """
+        True if the file or directory name matches none of the exclusion patterns.
+        :param entry: A data-class object.
+        :returns: True if the name doesn't match any of the exclusion patterns.
+        """
+        ...
+    @overload
+    def is_included(self, entry: os.DirEntry[str]) -> bool:
         """
         True if the file or directory name matches none of the exclusion patterns.
-        :param name: Name to match against the rule-set.
-        :param is_dir: Whether the name identifies a directory.
+        :param entry: An object returned by `scandir`.
         :returns: True if the name doesn't match any of the exclusion patterns.
         """
+        ...
-        return not self.is_excluded(name, is_dir)
+    def is_included(self, entry: Union[Entry, os.DirEntry[str]]) -> bool:
+        return not self.is_excluded(entry)
-    def filter(self, items: Iterable[Entry]) -> list[Entry]:
+    def filter(self, entries: Iterable[Entry]) -> list[Entry]:
         """
         Returns only those elements from the input that don't match any of the exclusion rules.
-        :param items: A list of names to filter.
+        :param entries: A list of names to filter.
         :returns: A filtered list of names that didn't match any of the exclusion rules.
         """
-        return [item for item in items if self.is_included(item.name, item.is_dir)]
+        return [entry for entry in entries if self.is_included(entry)]
     def scandir(self, path: Path) -> list[Entry]:
         """

md2conf/mermaid.py CHANGED Viewed

@@ -79,10 +79,16 @@ def render_diagram(source: str, output_format: Literal["png", "svg"] = "png") ->
         )
         stdout, stderr = proc.communicate(input=source.encode("utf-8"))
         if proc.returncode:
-            raise RuntimeError(
-                f"failed to convert Mermaid diagram; exit code: {proc.returncode}, "
-                f"output:\n{stdout.decode('utf-8')}\n{stderr.decode('utf-8')}"
-            )
+            messages = [
+                f"failed to convert Mermaid diagram; exit code: {proc.returncode}"
+            ]
+            console_output = stdout.decode("utf-8")
+            if console_output:
+                messages.append(f"output:\n{console_output}")
+            console_error = stderr.decode("utf-8")
+            if console_error:
+                messages.append(f"error:\n{console_error}")
+            raise RuntimeError("\n".join(messages))
         with open(filename, "rb") as image:
             return image.read()

md2conf/metadata.py CHANGED Viewed

@@ -33,10 +33,8 @@ class ConfluencePageMetadata:
     :param page_id: Confluence page ID.
     :param space_key: Confluence space key.
     :param title: Document title.
-    :param overwrite: True if operations are allowed to update document properties (e.g. title).
     """
     page_id: str
-    space_key: Optional[str]
+    space_key: str
     title: str
-    overwrite: bool

md2conf/processor.py CHANGED Viewed

@@ -6,20 +6,68 @@ Copyright 2022-2025, Levente Hunyadi
 :see: https://github.com/hunyadi/md2conf
 """
+import hashlib
 import logging
 import os
 from abc import abstractmethod
 from pathlib import Path
-from typing import Optional
+from typing import Iterable, Optional
+from .collection import ConfluencePageCollection
 from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
 from .matcher import Matcher, MatcherOptions
-from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
+from .metadata import ConfluenceSiteMetadata
 from .properties import ArgumentError
+from .scanner import Scanner
 LOGGER = logging.getLogger(__name__)
+class DocumentNode:
+    absolute_path: Path
+    page_id: Optional[str]
+    space_key: Optional[str]
+    title: Optional[str]
+    _children: list["DocumentNode"]
+    def __init__(
+        self,
+        absolute_path: Path,
+        page_id: Optional[str],
+        space_key: Optional[str] = None,
+        title: Optional[str] = None,
+    ):
+        self.absolute_path = absolute_path
+        self.page_id = page_id
+        self.space_key = space_key
+        self.title = title
+        self._children = []
+    def count(self) -> int:
+        c = len(self._children)
+        for child in self._children:
+            c += child.count()
+        return c
+    def add_child(self, child: "DocumentNode") -> None:
+        self._children.append(child)
+    def children(self) -> Iterable["DocumentNode"]:
+        for child in self._children:
+            yield child
+    def descendants(self) -> Iterable["DocumentNode"]:
+        for child in self._children:
+            yield child
+            yield from child.descendants()
+    def all(self) -> Iterable["DocumentNode"]:
+        yield self
+        for child in self._children:
+            yield from child.all()
 class Processor:
     """
     Processes a single Markdown page or a directory of Markdown pages.
@@ -29,7 +77,7 @@ class Processor:
     site: ConfluenceSiteMetadata
     root_dir: Path
-    page_metadata: dict[Path, ConfluencePageMetadata]
+    page_metadata: ConfluencePageCollection
     def __init__(
         self,
@@ -40,8 +88,7 @@ class Processor:
         self.options = options
         self.site = site
         self.root_dir = root_dir
-        self.page_metadata = {}
+        self.page_metadata = ConfluencePageCollection()
     def process_directory(self, local_dir: Path) -> None:
         """
@@ -51,13 +98,16 @@ class Processor:
         local_dir = local_dir.resolve(True)
         LOGGER.info("Processing directory: %s", local_dir)
-        # Step 1: build index of all page metadata
-        self._index_directory(local_dir, self.options.root_page_id)
-        LOGGER.info("Indexed %d page(s)", len(self.page_metadata))
+        # Step 1: build index of all Markdown files in directory hierarchy
+        root = self._index_directory(local_dir, None)
+        LOGGER.info("Indexed %d document(s)", root.count())
-        # Step 2: convert each page
-        for page_path in self.page_metadata.keys():
-            self._process_page(page_path)
+        # Step 2: synchronize directory tree structure with page hierarchy in space
+        self._synchronize_tree(root, self.options.root_page_id)
+        # Step 3: synchronize files in directory hierarchy with pages in space
+        for path, metadata in self.page_metadata.items():
+            self._synchronize_page(path, ConfluencePageID(metadata.page_id))
     def process_page(self, path: Path) -> None:
         """
@@ -65,34 +115,52 @@ class Processor:
         """
         LOGGER.info("Processing page: %s", path)
-        self._index_page(path, self.options.root_page_id)
-        self._process_page(path)
-    def _process_page(self, path: Path) -> None:
-        document = ConfluenceDocument.create(
+        # Step 1: parse Markdown file
+        root = self._index_file(path)
+        # Step 2: find matching page in Confluence
+        self._synchronize_tree(root, self.options.root_page_id)
+        # Step 3: synchronize document with page in space
+        for path, metadata in self.page_metadata.items():
+            self._synchronize_page(path, ConfluencePageID(metadata.page_id))
+    def _synchronize_page(self, path: Path, page_id: ConfluencePageID) -> None:
+        """
+        Synchronizes a single Markdown document with its corresponding Confluence page.
+        """
+        page_id, document = ConfluenceDocument.create(
             path, self.options, self.root_dir, self.site, self.page_metadata
         )
-        self._save_document(document, path)
+        self._update_page(page_id, document, path)
     @abstractmethod
-    def _get_or_create_page(
-        self,
-        absolute_path: Path,
-        parent_id: Optional[ConfluencePageID],
-        *,
-        title: Optional[str] = None,
-    ) -> ConfluencePageMetadata:
+    def _synchronize_tree(
+        self, node: DocumentNode, page_id: Optional[ConfluencePageID]
+    ) -> None:
         """
-        Creates a new Confluence page if no page is linked in the Markdown document.
+        Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
+        Creates new Confluence pages as necessary, e.g. if no page is linked in the Markdown document, or no page is found with lookup by page title.
+        May update the original Markdown document to add tags to associate the document with its corresponding Confluence page.
         """
         ...
     @abstractmethod
-    def _save_document(self, document: ConfluenceDocument, path: Path) -> None: ...
+    def _update_page(
+        self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
+    ) -> None:
+        """
+        Saves the document as Confluence Storage Format XHTML.
+        """
+        ...
     def _index_directory(
-        self, local_dir: Path, parent_id: Optional[ConfluencePageID]
-    ) -> None:
+        self, local_dir: Path, parent: Optional[DocumentNode]
+    ) -> DocumentNode:
         """
         Indexes Markdown files in a directory hierarchy recursively.
         """
@@ -104,7 +172,7 @@ class Processor:
         files: list[Path] = []
         directories: list[Path] = []
         for entry in os.scandir(local_dir):
-            if matcher.is_excluded(entry.name, entry.is_dir()):
+            if matcher.is_excluded(entry):
                 continue
             if entry.is_file():
@@ -132,28 +200,54 @@ class Processor:
             if parent_doc in files:
                 files.remove(parent_doc)
-            # use latest parent as parent for index page
-            metadata = self._get_or_create_page(parent_doc, parent_id)
-            LOGGER.debug("Indexed parent %s with metadata: %s", parent_doc, metadata)
-            self.page_metadata[parent_doc] = metadata
-            # assign new index page as new parent
-            parent_id = ConfluencePageID(metadata.page_id)
-        for doc in files:
-            self._index_page(doc, parent_id)
+            # promote Markdown document in directory as parent page in Confluence
+            node = self._index_file(parent_doc)
+            if parent is not None:
+                parent.add_child(node)
+            parent = node
+        elif parent is None:
+            # create new top-level node
+            if self.options.root_page_id is not None:
+                page_id = self.options.root_page_id.page_id
+                parent = DocumentNode(local_dir, page_id=page_id)
+            else:
+                # local use only, raises error with remote synchronization
+                parent = DocumentNode(local_dir, page_id=None)
+        for file in files:
+            node = self._index_file(file)
+            parent.add_child(node)
         for directory in directories:
-            self._index_directory(directory, parent_id)
+            self._index_directory(directory, parent)
+        return parent
-    def _index_page(self, path: Path, parent_id: Optional[ConfluencePageID]) -> None:
+    def _index_file(self, path: Path) -> DocumentNode:
         """
         Indexes a single Markdown file.
         """
-        metadata = self._get_or_create_page(path, parent_id)
-        LOGGER.debug("Indexed %s with metadata: %s", path, metadata)
-        self.page_metadata[path] = metadata
+        LOGGER.info("Indexing file: %s", path)
+        # extract information from a Markdown document found in a local directory.
+        document = Scanner().read(path)
+        return DocumentNode(
+            absolute_path=path,
+            page_id=document.page_id,
+            space_key=document.space_key,
+            title=document.title,
+        )
+    def _generate_hash(self, absolute_path: Path) -> str:
+        """
+        Computes a digest to be used as a unique string.
+        """
+        relative_path = absolute_path.relative_to(self.root_dir)
+        hash = hashlib.md5(relative_path.as_posix().encode("utf-8"))
+        return "".join(f"{c:x}" for c in hash.digest())
 class ProcessorFactory:

md2conf/properties.py CHANGED Viewed

@@ -54,15 +54,28 @@ class ConfluenceSiteProperties:
         self.space_key = opt_space_key
-class ConfluenceConnectionProperties(ConfluenceSiteProperties):
-    "Properties related to connecting to Confluence."
+class ConfluenceConnectionProperties:
+    """
+    Properties related to connecting to Confluence.
+    :param api_url: Confluence API URL. Required for scoped tokens.
+    :param user_name: Confluence user name.
+    :param api_key: Confluence API key.
+    :param headers: Additional HTTP headers to pass to Confluence REST API calls.
+    """
+    domain: Optional[str]
+    base_path: Optional[str]
+    space_key: Optional[str]
+    api_url: Optional[str]
     user_name: Optional[str]
     api_key: str
     headers: Optional[dict[str, str]]
     def __init__(
         self,
+        *,
+        api_url: Optional[str] = None,
         domain: Optional[str] = None,
         base_path: Optional[str] = None,
         user_name: Optional[str] = None,
@@ -70,14 +83,20 @@ class ConfluenceConnectionProperties(ConfluenceSiteProperties):
         space_key: Optional[str] = None,
         headers: Optional[dict[str, str]] = None,
     ) -> None:
-        super().__init__(domain, base_path, space_key)
+        opt_api_url = api_url or os.getenv("CONFLUENCE_API_URL")
+        opt_domain = domain or os.getenv("CONFLUENCE_DOMAIN")
+        opt_base_path = base_path or os.getenv("CONFLUENCE_PATH")
+        opt_space_key = space_key or os.getenv("CONFLUENCE_SPACE_KEY")
         opt_user_name = user_name or os.getenv("CONFLUENCE_USER_NAME")
         opt_api_key = api_key or os.getenv("CONFLUENCE_API_KEY")
         if not opt_api_key:
             raise ArgumentError("Confluence API key not specified")
+        self.api_url = opt_api_url
+        self.domain = opt_domain
+        self.base_path = opt_base_path
+        self.space_key = opt_space_key
         self.user_name = opt_user_name
         self.api_key = opt_api_key
         self.headers = headers

markdown-to-confluence 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

markdown-to-confluence 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl