PyPI - markdown-to-confluence - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

markdown-to-confluence 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/METADATA +118 -7
markdown_to_confluence-0.4.0.dist-info/RECORD +25 -0
md2conf/__init__.py +1 -1
md2conf/__main__.py +18 -7
md2conf/api.py +372 -186
md2conf/application.py +82 -70
md2conf/collection.py +31 -0
md2conf/converter.py +17 -10
md2conf/emoji.py +28 -3
md2conf/extra.py +14 -0
md2conf/local.py +30 -35
md2conf/metadata.py +0 -2
md2conf/processor.py +134 -38
md2conf/properties.py +24 -5
md2conf/scanner.py +53 -21
markdown_to_confluence-0.3.5.dist-info/RECORD +0 -23
{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/WHEEL +0 -0
{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/entry_points.txt +0 -0
{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/licenses/LICENSE +0 -0
{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/top_level.txt +0 -0
{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/zip-safe +0 -0

md2conf/processor.py CHANGED Viewed

@@ -6,20 +6,68 @@ Copyright 2022-2025, Levente Hunyadi
 :see: https://github.com/hunyadi/md2conf
 """
+import hashlib
 import logging
 import os
 from abc import abstractmethod
 from pathlib import Path
-from typing import Optional
+from typing import Iterable, Optional
+from .collection import ConfluencePageCollection
 from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
 from .matcher import Matcher, MatcherOptions
-from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
+from .metadata import ConfluenceSiteMetadata
 from .properties import ArgumentError
+from .scanner import Scanner
 LOGGER = logging.getLogger(__name__)
+class DocumentNode:
+    absolute_path: Path
+    page_id: Optional[str]
+    space_key: Optional[str]
+    title: Optional[str]
+    _children: list["DocumentNode"]
+    def __init__(
+        self,
+        absolute_path: Path,
+        page_id: Optional[str],
+        space_key: Optional[str] = None,
+        title: Optional[str] = None,
+    ):
+        self.absolute_path = absolute_path
+        self.page_id = page_id
+        self.space_key = space_key
+        self.title = title
+        self._children = []
+    def count(self) -> int:
+        c = len(self._children)
+        for child in self._children:
+            c += child.count()
+        return c
+    def add_child(self, child: "DocumentNode") -> None:
+        self._children.append(child)
+    def children(self) -> Iterable["DocumentNode"]:
+        for child in self._children:
+            yield child
+    def descendants(self) -> Iterable["DocumentNode"]:
+        for child in self._children:
+            yield child
+            yield from child.descendants()
+    def all(self) -> Iterable["DocumentNode"]:
+        yield self
+        for child in self._children:
+            yield from child.all()
 class Processor:
     """
     Processes a single Markdown page or a directory of Markdown pages.
@@ -29,7 +77,7 @@ class Processor:
     site: ConfluenceSiteMetadata
     root_dir: Path
-    page_metadata: dict[Path, ConfluencePageMetadata]
+    page_metadata: ConfluencePageCollection
     def __init__(
         self,
@@ -40,8 +88,7 @@ class Processor:
         self.options = options
         self.site = site
         self.root_dir = root_dir
-        self.page_metadata = {}
+        self.page_metadata = ConfluencePageCollection()
     def process_directory(self, local_dir: Path) -> None:
         """
@@ -51,13 +98,16 @@ class Processor:
         local_dir = local_dir.resolve(True)
         LOGGER.info("Processing directory: %s", local_dir)
-        # Step 1: build index of all page metadata
-        self._index_directory(local_dir, self.options.root_page_id)
-        LOGGER.info("Indexed %d page(s)", len(self.page_metadata))
+        # Step 1: build index of all Markdown files in directory hierarchy
+        root = self._index_directory(local_dir, None)
+        LOGGER.info("Indexed %d document(s)", root.count())
+        # Step 2: synchronize directory tree structure with page hierarchy in space
+        self._synchronize_tree(root, self.options.root_page_id)
-        # Step 2: convert each page
-        for page_path in self.page_metadata.keys():
-            self._process_page(page_path)
+        # Step 3: synchronize files in directory hierarchy with pages in space
+        for path, metadata in self.page_metadata.items():
+            self._synchronize_page(path, ConfluencePageID(metadata.page_id))
     def process_page(self, path: Path) -> None:
         """
@@ -65,32 +115,52 @@ class Processor:
         """
         LOGGER.info("Processing page: %s", path)
-        self._index_page(path, self.options.root_page_id)
-        self._process_page(path)
-    def _process_page(self, path: Path) -> None:
+        # Step 1: parse Markdown file
+        root = self._index_file(path)
+        # Step 2: find matching page in Confluence
+        self._synchronize_tree(root, self.options.root_page_id)
+        # Step 3: synchronize document with page in space
+        for path, metadata in self.page_metadata.items():
+            self._synchronize_page(path, ConfluencePageID(metadata.page_id))
+    def _synchronize_page(self, path: Path, page_id: ConfluencePageID) -> None:
+        """
+        Synchronizes a single Markdown document with its corresponding Confluence page.
+        """
         page_id, document = ConfluenceDocument.create(
             path, self.options, self.root_dir, self.site, self.page_metadata
         )
-        self._save_document(page_id, document, path)
+        self._update_page(page_id, document, path)
     @abstractmethod
-    def _get_or_create_page(
-        self, absolute_path: Path, parent_id: Optional[ConfluencePageID]
-    ) -> ConfluencePageMetadata:
+    def _synchronize_tree(
+        self, node: DocumentNode, page_id: Optional[ConfluencePageID]
+    ) -> None:
         """
-        Creates a new Confluence page if no page is linked in the Markdown document.
+        Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
+        Creates new Confluence pages as necessary, e.g. if no page is linked in the Markdown document, or no page is found with lookup by page title.
+        May update the original Markdown document to add tags to associate the document with its corresponding Confluence page.
         """
         ...
     @abstractmethod
-    def _save_document(
+    def _update_page(
         self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
-    ) -> None: ...
+    ) -> None:
+        """
+        Saves the document as Confluence Storage Format XHTML.
+        """
+        ...
     def _index_directory(
-        self, local_dir: Path, parent_id: Optional[ConfluencePageID]
-    ) -> None:
+        self, local_dir: Path, parent: Optional[DocumentNode]
+    ) -> DocumentNode:
         """
         Indexes Markdown files in a directory hierarchy recursively.
         """
@@ -130,28 +200,54 @@ class Processor:
             if parent_doc in files:
                 files.remove(parent_doc)
-            # use latest parent as parent for index page
-            metadata = self._get_or_create_page(parent_doc, parent_id)
-            LOGGER.debug("Indexed parent %s with metadata: %s", parent_doc, metadata)
-            self.page_metadata[parent_doc] = metadata
-            # assign new index page as new parent
-            parent_id = ConfluencePageID(metadata.page_id)
-        for doc in files:
-            self._index_page(doc, parent_id)
+            # promote Markdown document in directory as parent page in Confluence
+            node = self._index_file(parent_doc)
+            if parent is not None:
+                parent.add_child(node)
+            parent = node
+        elif parent is None:
+            # create new top-level node
+            if self.options.root_page_id is not None:
+                page_id = self.options.root_page_id.page_id
+                parent = DocumentNode(local_dir, page_id=page_id)
+            else:
+                # local use only, raises error with remote synchronization
+                parent = DocumentNode(local_dir, page_id=None)
+        for file in files:
+            node = self._index_file(file)
+            parent.add_child(node)
         for directory in directories:
-            self._index_directory(directory, parent_id)
+            self._index_directory(directory, parent)
-    def _index_page(self, path: Path, parent_id: Optional[ConfluencePageID]) -> None:
+        return parent
+    def _index_file(self, path: Path) -> DocumentNode:
         """
         Indexes a single Markdown file.
         """
-        metadata = self._get_or_create_page(path, parent_id)
-        LOGGER.debug("Indexed %s with metadata: %s", path, metadata)
-        self.page_metadata[path] = metadata
+        LOGGER.info("Indexing file: %s", path)
+        # extract information from a Markdown document found in a local directory.
+        document = Scanner().read(path)
+        return DocumentNode(
+            absolute_path=path,
+            page_id=document.page_id,
+            space_key=document.space_key,
+            title=document.title,
+        )
+    def _generate_hash(self, absolute_path: Path) -> str:
+        """
+        Computes a digest to be used as a unique string.
+        """
+        relative_path = absolute_path.relative_to(self.root_dir)
+        hash = hashlib.md5(relative_path.as_posix().encode("utf-8"))
+        return "".join(f"{c:x}" for c in hash.digest())
 class ProcessorFactory:

md2conf/properties.py CHANGED Viewed

@@ -54,15 +54,28 @@ class ConfluenceSiteProperties:
         self.space_key = opt_space_key
-class ConfluenceConnectionProperties(ConfluenceSiteProperties):
-    "Properties related to connecting to Confluence."
+class ConfluenceConnectionProperties:
+    """
+    Properties related to connecting to Confluence.
+    :param api_url: Confluence API URL. Required for scoped tokens.
+    :param user_name: Confluence user name.
+    :param api_key: Confluence API key.
+    :param headers: Additional HTTP headers to pass to Confluence REST API calls.
+    """
+    domain: Optional[str]
+    base_path: Optional[str]
+    space_key: Optional[str]
+    api_url: Optional[str]
     user_name: Optional[str]
     api_key: str
     headers: Optional[dict[str, str]]
     def __init__(
         self,
+        *,
+        api_url: Optional[str] = None,
         domain: Optional[str] = None,
         base_path: Optional[str] = None,
         user_name: Optional[str] = None,
@@ -70,14 +83,20 @@ class ConfluenceConnectionProperties(ConfluenceSiteProperties):
         space_key: Optional[str] = None,
         headers: Optional[dict[str, str]] = None,
     ) -> None:
-        super().__init__(domain, base_path, space_key)
+        opt_api_url = api_url or os.getenv("CONFLUENCE_API_URL")
+        opt_domain = domain or os.getenv("CONFLUENCE_DOMAIN")
+        opt_base_path = base_path or os.getenv("CONFLUENCE_PATH")
+        opt_space_key = space_key or os.getenv("CONFLUENCE_SPACE_KEY")
         opt_user_name = user_name or os.getenv("CONFLUENCE_USER_NAME")
         opt_api_key = api_key or os.getenv("CONFLUENCE_API_KEY")
         if not opt_api_key:
             raise ArgumentError("Confluence API key not specified")
+        self.api_url = opt_api_url
+        self.domain = opt_domain
+        self.base_path = opt_base_path
+        self.space_key = opt_space_key
         self.user_name = opt_user_name
         self.api_key = opt_api_key
         self.headers = headers

md2conf/scanner.py CHANGED Viewed

@@ -9,15 +9,26 @@ Copyright 2022-2025, Levente Hunyadi
 import re
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Optional, TypeVar
 import yaml
+from strong_typing.core import JsonType
+from strong_typing.serialization import DeserializerOptions, json_to_object
+T = TypeVar("T")
+def _json_to_object(
+    typ: type[T],
+    data: JsonType,
+) -> T:
+    return json_to_object(typ, data, options=DeserializerOptions(skip_unassigned=True))
 def extract_value(pattern: str, text: str) -> tuple[Optional[str], str]:
     values: list[str] = []
-    def _repl_func(matchobj: re.Match) -> str:
+    def _repl_func(matchobj: re.Match[str]) -> str:
         values.append(matchobj.group(1))
         return ""
@@ -46,16 +57,27 @@ def extract_frontmatter_properties(text: str) -> tuple[Optional[dict[str, Any]],
     return properties, text
-def get_string(properties: dict[str, Any], key: str) -> Optional[str]:
-    value = properties.get(key)
-    if value is None:
-        return None
-    elif not isinstance(value, str):
-        raise ValueError(
-            f"expected dictionary value type of `str` for key `{key}`; got value of type `{type(value).__name__}`"
-        )
-    else:
-        return value
+@dataclass
+class DocumentProperties:
+    """
+    An object that holds properties extracted from the front-matter of a Markdown document.
+    :param page_id: Confluence page ID.
+    :param space_key: Confluence space key.
+    :param confluence_page_id: Confluence page ID. (Alternative name for JSON de-serialization.)
+    :param confluence_space_key: Confluence space key. (Alternative name for JSON de-serialization.)
+    :param generated_by: Text identifying the tool that generated the document.
+    :param title: The title extracted from front-matter.
+    :param tags: A list of tags (content labels) extracted from front-matter.
+    """
+    page_id: Optional[str]
+    space_key: Optional[str]
+    confluence_page_id: Optional[str]
+    confluence_space_key: Optional[str]
+    generated_by: Optional[str]
+    title: Optional[str]
+    tags: Optional[list[str]]
 @dataclass
@@ -67,6 +89,7 @@ class ScannedDocument:
     :param space_key: Confluence space key.
     :param generated_by: Text identifying the tool that generated the document.
     :param title: The title extracted from front-matter.
+    :param tags: A list of tags (content labels) extracted from front-matter.
     :param text: Text that remains after front-matter and inline properties have been extracted.
     """
@@ -74,6 +97,7 @@ class ScannedDocument:
     space_key: Optional[str]
     generated_by: Optional[str]
     title: Optional[str]
+    tags: Optional[list[str]]
     text: str
@@ -88,30 +112,38 @@ class Scanner:
             text = f.read()
         # extract Confluence page ID
-        page_id, text = extract_value(r"<!--\s+confluence-page-id:\s*(\d+)\s+-->", text)
+        page_id, text = extract_value(
+            r"<!--\s+confluence[-_]page[-_]id:\s*(\d+)\s+-->", text
+        )
         # extract Confluence space key
         space_key, text = extract_value(
-            r"<!--\s+confluence-space-key:\s*(\S+)\s+-->", text
+            r"<!--\s+confluence[-_]space[-_]key:\s*(\S+)\s+-->", text
         )
         # extract 'generated-by' tag text
-        generated_by, text = extract_value(r"<!--\s+generated-by:\s*(.*)\s+-->", text)
+        generated_by, text = extract_value(
+            r"<!--\s+generated[-_]by:\s*(.*)\s+-->", text
+        )
         title: Optional[str] = None
+        tags: Optional[list[str]] = None
         # extract front-matter
-        properties, text = extract_frontmatter_properties(text)
-        if properties is not None:
-            page_id = page_id or get_string(properties, "confluence-page-id")
-            space_key = space_key or get_string(properties, "confluence-space-key")
-            generated_by = generated_by or get_string(properties, "generated-by")
-            title = get_string(properties, "title")
+        data, text = extract_frontmatter_properties(text)
+        if data is not None:
+            p = _json_to_object(DocumentProperties, data)
+            page_id = page_id or p.confluence_page_id or p.page_id
+            space_key = space_key or p.confluence_space_key or p.space_key
+            generated_by = generated_by or p.generated_by
+            title = p.title
+            tags = p.tags
         return ScannedDocument(
             page_id=page_id,
             space_key=space_key,
             generated_by=generated_by,
             title=title,
+            tags=tags,
             text=text,
         )

markdown_to_confluence-0.3.5.dist-info/RECORD DELETED Viewed

@@ -1,23 +0,0 @@
-markdown_to_confluence-0.3.5.dist-info/licenses/LICENSE,sha256=Pv43so2bPfmKhmsrmXFyAvS7M30-1i1tzjz6-dfhyOo,1077
-md2conf/__init__.py,sha256=Uaqb3maQScpYs3FiH8kuM6pUh5JzE4Vy52MgU9pvMTw,402
-md2conf/__main__.py,sha256=bFcfmSnTWeuhmDm7bJ3jJabZ2S8W9biuAP6_R-Cc9As,8034
-md2conf/api.py,sha256=VxrAJ4yCsdGFVAEQQWw5aONwsMz0b6KvN4EMLXCKOwE,26905
-md2conf/application.py,sha256=SIM4yLHaLnvG7wRJLbRvptrkc0q4JMuAhDnanqsuYzA,6697
-md2conf/converter.py,sha256=ASXhs7g79dOU4x1QhfvKL8mtwth508GTGcb3AUHigC4,37286
-md2conf/emoji.py,sha256=48QJtOD0F3Be1laYLvAOwe0GxrJS-vcfjtCdiBsNcAc,1960
-md2conf/entities.dtd,sha256=M6NzqL5N7dPs_eUA_6sDsiSLzDaAacrx9LdttiufvYU,30215
-md2conf/local.py,sha256=998bBRpDAOywA-L0KD4_VyuL2Xftflv0ler-uNPQZn4,3866
-md2conf/matcher.py,sha256=y5WEZNklTpUoJtMJlulTvfhl_v-UMU6wySJAKit91ig,4940
-md2conf/mermaid.py,sha256=ZETocFDKi_fSYyVR1pJ7fo207YYFSuT44MSYFQ8-cZ0,2562
-md2conf/metadata.py,sha256=Xozg2PjJnis7VQYQT_edIvTb8u0cs_ZizPOAxc1N8vg,1003
-md2conf/processor.py,sha256=jSLFy8hqZJXf3b79jp31Fn9-cm4j9xq4HDChp9pyhP0,6706
-md2conf/properties.py,sha256=TOCXLdTfYkKjRwZaMgvXw0mNCI4opEUwpBXro2Kv2B4,2467
-md2conf/puppeteer-config.json,sha256=-dMTAN_7kNTGbDlfXzApl0KJpAWna9YKZdwMKbpOb60,159
-md2conf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-md2conf/scanner.py,sha256=iF8NCQAFO6Yut5aAQr7uxfWzVMMt9j3T5ADoVVSJWKQ,3543
-markdown_to_confluence-0.3.5.dist-info/METADATA,sha256=NiXwBXtQ5WhHce_JX7TBUSefQSR5jk5fERe46BL4vwE,18462
-markdown_to_confluence-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-markdown_to_confluence-0.3.5.dist-info/entry_points.txt,sha256=F1zxa1wtEObtbHS-qp46330WVFLHdMnV2wQ-ZorRmX0,50
-markdown_to_confluence-0.3.5.dist-info/top_level.txt,sha256=_FJfl_kHrHNidyjUOuS01ngu_jDsfc-ZjSocNRJnTzU,8
-markdown_to_confluence-0.3.5.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-markdown_to_confluence-0.3.5.dist-info/RECORD,,

{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

{markdown_to_confluence-0.3.5.dist-info → markdown_to_confluence-0.4.0.dist-info}/zip-safe RENAMED Viewed

File without changes

markdown-to-confluence 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

markdown-to-confluence 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl