PyPI - markdown-to-confluence - Versions diffs - 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl - Mend

markdown-to-confluence 0.5.3py3-none-any.whl → 0.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/METADATA +275 -208
markdown_to_confluence-0.5.5.dist-info/RECORD +57 -0
{markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/WHEEL +1 -1
md2conf/__init__.py +1 -1
md2conf/__main__.py +61 -189
md2conf/api.py +35 -69
md2conf/attachment.py +4 -3
md2conf/clio.py +226 -0
md2conf/compatibility.py +5 -0
md2conf/converter.py +239 -147
md2conf/csf.py +89 -9
md2conf/drawio/extension.py +3 -3
md2conf/drawio/render.py +2 -0
md2conf/extension.py +4 -0
md2conf/external.py +25 -8
md2conf/frontmatter.py +18 -6
md2conf/image.py +17 -14
md2conf/latex.py +8 -1
md2conf/markdown.py +68 -1
md2conf/mermaid/render.py +1 -1
md2conf/options.py +95 -24
md2conf/plantuml/extension.py +7 -7
md2conf/plantuml/render.py +6 -7
md2conf/png.py +10 -6
md2conf/processor.py +24 -3
md2conf/publisher.py +193 -36
md2conf/reflection.py +74 -0
md2conf/scanner.py +16 -6
md2conf/serializer.py +12 -1
md2conf/svg.py +131 -109
md2conf/toc.py +72 -0
md2conf/xml.py +45 -0
markdown_to_confluence-0.5.3.dist-info/RECORD +0 -55
{markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/entry_points.txt +0 -0
{markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/licenses/LICENSE +0 -0
{markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/top_level.txt +0 -0
{markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/zip-safe +0 -0
/md2conf/{puppeteer-config.json → mermaid/puppeteer-config.json} +0 -0

md2conf/plantuml/extension.py CHANGED Viewed

@@ -19,7 +19,7 @@ from md2conf.compatibility import override, path_relative_to
 from md2conf.csf import AC_ATTR, AC_ELEM
 from md2conf.extension import MarketplaceExtension
 from md2conf.formatting import ImageAttributes
-from md2conf.svg import get_svg_dimensions_from_bytes
+from md2conf.svg import get_svg_dimensions
 from .config import PlantUMLConfigProperties
 from .render import compress_plantuml_data, has_plantuml, render_diagram
@@ -87,7 +87,7 @@ class PlantUMLExtension(MarketplaceExtension):
                 image_data = render_diagram(content, "svg", config=config)
                 # extract dimensions from SVG
-                width, height = get_svg_dimensions_from_bytes(image_data)
+                dimensions = get_svg_dimensions(image_data)
                 # generate SVG filename and add as attachment
                 if relative_path is not None:
@@ -98,11 +98,11 @@ class PlantUMLExtension(MarketplaceExtension):
                     svg_filename = attachment_name(f"embedded_{plantuml_hash}.svg")
                     self.attachments.add_embed(svg_filename, EmbeddedFileData(image_data))
-                return self._create_plantuml_macro(content, svg_filename, width, height)
+                return self._create_plantuml_macro(content, svg_filename, dimensions)
             else:
                 return self._create_plantuml_macro(content)
-    def _create_plantuml_macro(self, source: str, filename: str | None = None, width: int | None = None, height: int | None = None) -> ElementType:
+    def _create_plantuml_macro(self, source: str, filename: str | None = None, dimensions: tuple[int, int] | None = None) -> ElementType:
         """
         A PlantUML diagram using a `structured-macro` with embedded data.
@@ -128,7 +128,8 @@ class PlantUMLExtension(MarketplaceExtension):
             parameters.append(AC_ELEM("parameter", {AC_ATTR("name"): "filename"}, filename))
         # add optional dimension parameters if available
-        if width is not None:
+        if dimensions is not None:
+            width, height = dimensions
             parameters.append(
                 AC_ELEM(
                     "parameter",
@@ -136,7 +137,6 @@ class PlantUMLExtension(MarketplaceExtension):
                     str(width),
                 )
             )
-        if height is not None:
             parameters.append(
                 AC_ELEM(
                     "parameter",
@@ -148,7 +148,7 @@ class PlantUMLExtension(MarketplaceExtension):
         return AC_ELEM(
             "structured-macro",
             {
-                AC_ATTR("name"): "plantumlcloud",
+                AC_ATTR("name"): "plantumlcloud",  # spellchecker:disable-line
                 AC_ATTR("schema-version"): "1",
                 "data-layout": "default",
                 AC_ATTR("local-id"): local_id,

md2conf/plantuml/render.py CHANGED Viewed

@@ -92,17 +92,16 @@ def render_diagram(
     if config is None:
         config = PlantUMLConfigProperties()
-    # Build command for PlantUML with pipe mode
-    # -pipe: read from stdin and write to stdout
-    # -t<format>: output format (png or svg)
-    # -charset utf-8: ensure UTF-8 encoding
+    # command for PlantUML with pipe mode
     cmd = _get_plantuml_command()
     cmd.extend(
         [
-            "-pipe",
-            f"-t{output_format}",
-            "-charset",
+            "--charset",
             "utf-8",
+            "--format",
+            output_format,
+            "--no-error-image",
+            "--pipe",
         ]
     )

md2conf/png.py CHANGED Viewed

@@ -12,6 +12,10 @@ from struct import unpack
 from typing import BinaryIO, Iterable, overload
+class ImageFormatError(RuntimeError):
+    pass
 class _Chunk:
     "Data chunk in binary data as per the PNG image format."
@@ -34,7 +38,7 @@ def _read_signature(f: BinaryIO) -> None:
     signature = f.read(8)
     if signature != b"\x89PNG\r\n\x1a\n":
-        raise ValueError("not a valid PNG file")
+        raise ImageFormatError("not a valid PNG file")
 def _read_chunk(f: BinaryIO) -> _Chunk | None:
@@ -45,7 +49,7 @@ def _read_chunk(f: BinaryIO) -> _Chunk | None:
         return None
     if len(length_bytes) != 4:
-        raise ValueError("expected: 4 bytes storing chunk length")
+        raise ImageFormatError("expected: 4 bytes storing chunk length")
     length = int.from_bytes(length_bytes, "big")
@@ -53,7 +57,7 @@ def _read_chunk(f: BinaryIO) -> _Chunk | None:
     data_bytes = f.read(data_length)
     actual_length = len(data_bytes)
     if actual_length != data_length:
-        raise ValueError(f"expected: {length} bytes storing chunk data; got: {actual_length}")
+        raise ImageFormatError(f"expected: {length} bytes storing chunk data; got: {actual_length}")
     chunk_type = data_bytes[0:4]
     chunk_data = data_bytes[4:-4]
@@ -75,12 +79,12 @@ def _extract_png_dimensions(source_file: BinaryIO) -> tuple[int, int]:
     # validate IHDR (Image Header) chunk
     ihdr = _read_chunk(source_file)
     if ihdr is None:
-        raise ValueError("missing IHDR chunk")
+        raise ImageFormatError("missing IHDR chunk")
     if ihdr.length != 13:
-        raise ValueError("invalid chunk length")
+        raise ImageFormatError("invalid chunk length")
     if ihdr.name != b"IHDR":
-        raise ValueError(f"expected: IHDR chunk; got: {ihdr.name!r}")
+        raise ImageFormatError(f"expected: IHDR chunk; got: {ihdr.name!r}")
     (
         width,

md2conf/processor.py CHANGED Viewed

@@ -15,11 +15,12 @@ from typing import Iterable
 from .collection import ConfluencePageCollection
 from .converter import ConfluenceDocument
-from .environment import ArgumentError
+from .environment import ArgumentError, PageError
 from .matcher import DirectoryEntry, FileEntry, Matcher, MatcherOptions
 from .metadata import ConfluenceSiteMetadata
 from .options import ConfluencePageID, DocumentOptions
 from .scanner import Scanner
+from .toc import unique_title
 LOGGER = logging.getLogger(__name__)
@@ -143,6 +144,22 @@ class Processor:
         Processes a sub-tree rooted at an ancestor node.
         """
+        # verify if pages have a unique title to avoid overwrites within synchronized set
+        title_to_path: dict[str, Path] = {}
+        duplicates: set[Path] = set()
+        for node in root.all():
+            if node.title is not None:
+                path = title_to_path.get(node.title)
+                if path is not None:
+                    duplicates.add(path)
+                    duplicates.add(node.absolute_path)
+                else:
+                    title_to_path[node.title] = node.absolute_path
+        if duplicates:
+            raise PageError(
+                f"expected: each synchronized page to have a unique title but duplicates found in files: {', '.join(str(p) for p in sorted(list(duplicates)))}"
+            )
         # synchronize directory tree structure with page hierarchy in space (find matching pages in Confluence)
         self._synchronize_tree(root, self.options.root_page_id)
@@ -246,14 +263,18 @@ class Processor:
         LOGGER.info("Indexing file: %s", path)
         # extract information from a Markdown document found in a local directory.
-        document = Scanner().read(path)
+        with open(path, "r", encoding="utf-8") as f:
+            text = f.read()
+        document = Scanner().parse(text)
         props = document.properties
+        title = props.title or unique_title(text)
         return DocumentNode(
             absolute_path=path,
             page_id=props.page_id,
             space_key=props.space_key,
-            title=props.title,
+            title=title,
             synchronized=props.synchronized if props.synchronized is not None else True,
         )

md2conf/publisher.py CHANGED Viewed

@@ -6,23 +6,101 @@ Copyright 2022-2026, Levente Hunyadi
 :see: https://github.com/hunyadi/md2conf
 """
+import hashlib
 import logging
+from dataclasses import dataclass
 from pathlib import Path
-from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluenceSession, ConfluenceStatus
+from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluencePage, ConfluenceSession, ConfluenceStatus
 from .attachment import attachment_name
 from .compatibility import override, path_relative_to
-from .converter import ConfluenceDocument, get_volatile_attributes, get_volatile_elements
+from .converter import ConfluenceDocument, ElementType, get_volatile_attributes, get_volatile_elements
 from .csf import AC_ATTR, elements_from_string
 from .environment import PageError
 from .metadata import ConfluencePageMetadata
 from .options import ConfluencePageID, DocumentOptions
 from .processor import Converter, DocumentNode, Processor, ProcessorFactory
+from .serializer import json_to_object, object_to_json
 from .xml import is_xml_equal, unwrap_substitute
 LOGGER = logging.getLogger(__name__)
+CONTENT_PROPERTY_TAG = "md2conf"
+class _MissingType:
+    pass
+_MissingDefault = _MissingType()
+class ParentCatalog:
+    "Maintains a catalog of child-parent relationships."
+    _api: ConfluenceSession
+    _child_to_parent: dict[str, str | None]
+    _known: set[str]
+    def __init__(self, api: ConfluenceSession) -> None:
+        self._api = api
+        self._child_to_parent = {}
+        self._known = set()
+    def add_known(self, page_id: str) -> None:
+        """
+        Adds a new well-known page such as the root page or a page paired with a Markdown file using an explicit page ID.
+        """
+        self._known.add(page_id)
+    def add_parent(self, *, page_id: str, parent_id: str | None) -> None:
+        """
+        Adds a new child-parent relationship.
+        This method is useful to persist information acquired by a previous API call.
+        """
+        self._child_to_parent[page_id] = parent_id
+    def is_traceable(self, page_id: str) -> bool:
+        """
+        Verifies if a page traces back to a well-known root page.
+        :param page_id: The page to check.
+        """
+        if page_id in self._known:
+            return True
+        known_parent_id = self._child_to_parent.get(page_id, _MissingDefault)
+        if not isinstance(known_parent_id, _MissingType):
+            parent_id = known_parent_id
+        else:
+            page = self._api.get_page_properties(page_id)
+            parent_id = page.parentId
+            self._child_to_parent[page_id] = parent_id
+        if parent_id is None:
+            return False
+        return self.is_traceable(parent_id)
+@dataclass
+class ConfluenceMarkdownTag:
+    """
+    Captures information used to synchronize the Markdown source file with the Confluence target page.
+    :param page_version: Confluence page version number when the page was last synchronized.
+    :param source_digest: MD5 hash computed from the Markdown source file.
+    """
+    page_version: int
+    source_digest: str
 class SynchronizingProcessor(Processor):
     """
     Synchronizes a single Markdown page or a directory of Markdown pages with Confluence.
@@ -59,14 +137,18 @@ class SynchronizingProcessor(Processor):
         elif root_id is not None:
             real_id = root_id
         else:
-            raise NotImplementedError("condition not exhaustive")
+            raise NotImplementedError("condition not exhaustive for synchronizing tree")
-        self._synchronize_subtree(tree, real_id)
+        catalog = ParentCatalog(self.api)
+        catalog.add_known(real_id.page_id)
+        self._synchronize_subtree(tree, real_id, catalog)
-    def _synchronize_subtree(self, node: DocumentNode, parent_id: ConfluencePageID) -> None:
+    def _synchronize_subtree(self, node: DocumentNode, parent_id: ConfluencePageID, catalog: ParentCatalog) -> None:
         if node.page_id is not None:
             # verify if page exists
             page = self.api.get_page_properties(node.page_id)
+            catalog.add_known(page.id)
+            catalog.add_parent(page_id=page.id, parent_id=page.parentId)
             update = False
         else:
             if node.title is not None:
@@ -77,20 +159,26 @@ class SynchronizingProcessor(Processor):
                 digest = self._generate_hash(node.absolute_path)
                 title = f"{node.absolute_path.stem} [{digest}]"
-            if self.options.title_prefix is not None:
-                title = f"{self.options.title_prefix} {title}"
+            title = self._get_extended_title(title)
             # look up page by (possibly auto-generated) title
             page = self.api.get_or_create_page(title, parent_id.page_id)
+            catalog.add_parent(page_id=page.id, parent_id=page.parentId)
             if page.status is ConfluenceStatus.ARCHIVED:
-                # user has archived a page with this (auto-generated) title
-                raise PageError(f"unable to update archived page with ID {page.id}")
+                # user has archived a page with this (possibly auto-generated) title
+                raise PageError(f"unable to update archived page with ID {page.id} when synchronizing {node.absolute_path}")
+            if not catalog.is_traceable(page.id):
+                raise PageError(
+                    f"expected: page with ID {page.id} to be a descendant of the root page or one of the pages paired with a Markdown file using an explicit "
+                    f"page ID when synchronizing {node.absolute_path}"
+                )
             update = True
         space_key = self.api.space_id_to_key(page.spaceId)
-        if update:
+        if update and not self.options.skip_update:
             self._update_markdown(
                 node.absolute_path,
                 page_id=page.id,
@@ -106,7 +194,7 @@ class SynchronizingProcessor(Processor):
         self.page_metadata.add(node.absolute_path, data)
         for child_node in node.children():
-            self._synchronize_subtree(child_node, ConfluencePageID(page.id))
+            self._synchronize_subtree(child_node, ConfluencePageID(page.id), catalog)
     @override
     def _update_page(self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path) -> None:
@@ -136,48 +224,117 @@ class SynchronizingProcessor(Processor):
         content = document.xhtml()
         LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
-        title = None
-        if document.title is not None:
-            meta = self.page_metadata.get(path)
-            if meta is not None and meta.title != document.title:
-                conflicting_page_id = self.api.page_exists(document.title, space_id=self.api.space_key_to_id(meta.space_key))
-                if conflicting_page_id is None:
-                    title = document.title
-                else:
-                    LOGGER.info(
-                        "Document title of %s conflicts with Confluence page title of %s",
-                        path,
-                        conflicting_page_id,
-                    )
+        # compute content hash to help detect if document has changed
+        m = hashlib.md5()
+        with open(path, "rb") as f:
+            m.update(f.read())
+        source_digest = m.hexdigest()
+        # set Confluence title based on Markdown content
+        title = self._get_unique_title(document, path)
         # fetch existing page
         page = self.api.get_page(page_id.page_id)
+        prop = self.api.get_content_property_for_page(page_id.page_id, CONTENT_PROPERTY_TAG)
+        tag: ConfluenceMarkdownTag | None = None
+        if prop is not None:
+            try:
+                tag = json_to_object(ConfluenceMarkdownTag, prop.value)
+                LOGGER.debug("Page with ID %s has last synchronized version of %d and hash of %s", page.id, tag.page_version, tag.source_digest)
+            except Exception:
+                pass
+        # keep existing Confluence title if cannot infer meaningful title from Markdown source
         if not title:  # empty or `None`
             title = page.title
+        # synchronize page if page has any changes
+        if self._has_changes(page, tag, title, document.root, source_digest):
+            if tag is not None and page.version.number != tag.page_version:
+                LOGGER.warning("Page with ID %s has been edited since last synchronized: %s", page.id, page.title)
+            relative_path = path_relative_to(path, self.root_dir)
+            version = page.version.number + 1
+            self.api.update_page(page.id, content, title=title, version=version, message=f"Synchronized by md2conf from Markdown file: {relative_path}")
+        else:
+            version = page.version.number
+        if document.labels is not None:
+            self.api.update_labels(
+                page.id,
+                [ConfluenceLabel(name=label, prefix="global") for label in document.labels],
+            )
+        props = [ConfluenceContentProperty(CONTENT_PROPERTY_TAG, object_to_json(ConfluenceMarkdownTag(version, source_digest)))]
+        if document.properties is not None:
+            props.extend(ConfluenceContentProperty(key, value) for key, value in document.properties.items())
+            self.api.update_content_properties_for_page(page.id, props)
+        else:
+            if tag is None or tag.page_version != version:
+                self.api.update_content_properties_for_page(page.id, props, keep_existing=True)
+    def _has_changes(self, page: ConfluencePage, tag: ConfluenceMarkdownTag | None, title: str, root: ElementType, source_digest: str) -> bool:
+        "True if the Confluence Storage Format content generated from the Markdown source file matches the Confluence target page content."
+        if page.title != title:
+            LOGGER.info("Detected page with new title: %s", page.id)
+            return True
+        if tag is not None and tag.source_digest != source_digest:
+            LOGGER.info("Detected page with updated Markdown source: %s", page.id)
+            return True
         # discard comments
         tree = elements_from_string(page.content)
         unwrap_substitute(AC_ATTR("inline-comment-marker"), tree)
-        # check if page has any changes
-        if page.title != title or not is_xml_equal(
-            document.root,
+        # visit XML nodes recursively
+        if not is_xml_equal(
+            root,
             tree,
             skip_attributes=get_volatile_attributes(),
             skip_elements=get_volatile_elements(),
         ):
-            self.api.update_page(page_id.page_id, content, title=title, version=page.version.number + 1)
+            LOGGER.info("Detected page with updated Markdown content: %s", page.id)
+            return True
+        LOGGER.info("Up-to-date page: %s", page.id)
+        return False
+    def _get_extended_title(self, title: str) -> str:
+        """
+        Returns a title with the title prefix applied (if any).
+        """
+        if self.options.title_prefix is not None:
+            return f"{self.options.title_prefix} {title}"
         else:
-            LOGGER.info("Up-to-date page: %s", page_id.page_id)
+            return title
-        if document.labels is not None:
-            self.api.update_labels(
-                page_id.page_id,
-                [ConfluenceLabel(name=label, prefix="global") for label in document.labels],
-            )
+    def _get_unique_title(self, document: ConfluenceDocument, path: Path) -> str | None:
+        """
+        Determines the (new) document title to assign to the Confluence page.
-        if document.properties is not None:
-            self.api.update_content_properties_for_page(page_id.page_id, [ConfluenceContentProperty(key, value) for key, value in document.properties.items()])
+        Ensures that the title is unique across the Confluence space.
+        """
+        # document has no title (neither in front-matter nor as unique top-level heading)
+        if document.title is None:
+            return None
+        # add configured title prefix
+        title = self._get_extended_title(document.title)
+        # compare current document title with title discovered during directory traversal
+        meta = self.page_metadata.get(path)
+        if meta is not None and meta.title != title:
+            # title has changed, check if new title is available
+            page_id = self.api.page_exists(title, space_id=self.api.space_key_to_id(meta.space_key))
+            if page_id is not None:
+                LOGGER.info("Unrelated Confluence page with ID %s has the same inferred title as the Markdown file: %s", page_id, path)
+                return None
+        return title
     def _update_markdown(self, path: Path, *, page_id: str, space_key: str) -> None:
         """

md2conf/reflection.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""
+Publish Markdown files to Confluence wiki.
+Copyright 2022-2026, Levente Hunyadi
+:see: https://github.com/hunyadi/md2conf
+"""
+from collections.abc import Sequence
+from dataclasses import fields, is_dataclass
+from types import NoneType, UnionType
+from typing import Any, Literal, Union, get_args, get_origin, get_type_hints
+def get_nested_types(items: Sequence[Any]) -> set[type[Any]]:
+    "Returns a set of types that are directly or indirectly referenced by any of the specified items."
+    tps: set[type[Any]] = set()
+    for item in items:
+        tps.update(_get_nested_types(item))
+    return tps
+def _get_nested_types(tp: Any) -> set[type[Any]]:
+    tps: set[type[Any]] = set()
+    if tp is not None and tp is not NoneType:
+        origin = get_origin(tp)
+        if origin is list:
+            (item_type,) = get_args(tp)
+            tps.update(_get_nested_types(item_type))
+        elif origin is dict:
+            key_type, value_type = get_args(tp)
+            tps.update(_get_nested_types(key_type))
+            tps.update(_get_nested_types(value_type))
+        elif origin is set:
+            (elem_type,) = get_args(tp)
+            tps.update(_get_nested_types(elem_type))
+        elif origin is UnionType or origin is Union:
+            for union_arg in get_args(tp):
+                tps.update(_get_nested_types(union_arg))
+        elif isinstance(tp, type):
+            tps.add(tp)
+            if is_dataclass(tp):
+                for field in fields(tp):
+                    tps.update(_get_nested_types(field.type))
+            elif isinstance(tp, type):  # required to please static type checkers
+                for field_type in get_type_hints(tp).values():
+                    tps.update(_get_nested_types(field_type))
+    return tps
+def format_initializer(tp: Any) -> str:
+    "Prints an initialization value for a type."
+    origin = get_origin(tp)
+    if tp is None or tp is NoneType:
+        return "None"
+    elif origin is list:
+        (item_type,) = get_args(tp)
+        return f"[{format_initializer(item_type)}]"
+    elif origin is dict:
+        key_type, value_type = get_args(tp)
+        return f"{{{format_initializer(key_type)}: {format_initializer(value_type)}}}"
+    elif origin is set:
+        (elem_type,) = get_args(tp)
+        return f"[{format_initializer(elem_type)}]"
+    elif origin is Literal:
+        return " or ".join(repr(arg) for arg in get_args(tp))
+    elif origin is UnionType or origin is Union:
+        return " or ".join(format_initializer(arg) for arg in get_args(tp))
+    elif isinstance(tp, type):
+        return f"{tp.__name__}()"
+    else:
+        return "..."

md2conf/scanner.py CHANGED Viewed

@@ -63,10 +63,12 @@ class ScannedDocument:
     :param properties: Properties extracted from the front-matter of a Markdown document.
     :param text: Text that remains after front-matter and inline properties have been extracted.
+    :param start_line_number: The first line of the Markdown document excluding front-matter, or 1 if there is no front-matter.
     """
     properties: DocumentProperties
     text: str
+    start_line_number: int
 class Scanner:
@@ -75,10 +77,16 @@ class Scanner:
         Extracts essential properties from a Markdown document.
         """
-        # parse file
         with open(absolute_path, "r", encoding="utf-8") as f:
             text = f.read()
+        return self.parse(text)
+    def parse(self, text: str) -> ScannedDocument:
+        """
+        Extracts essential properties from a Markdown document.
+        """
         # extract Confluence page ID
         page_id, text = extract_value(r"<!--\s+confluence[-_]page[-_]id:\s*(\d+)\s+-->", text)
@@ -91,16 +99,18 @@ class Scanner:
         body_props = DocumentProperties(page_id=page_id, space_key=space_key, generated_by=generated_by)
         # extract front-matter
-        data, text = extract_frontmatter_json(text)
-        if data is not None:
-            frontmatter_props = json_to_object(DocumentProperties, data)
-            alias_props = json_to_object(AliasProperties, data)
+        frontmatter, text = extract_frontmatter_json(text)
+        if frontmatter is not None:
+            frontmatter_props = json_to_object(DocumentProperties, frontmatter.data)
+            alias_props = json_to_object(AliasProperties, frontmatter.data)
             if alias_props.confluence_page_id is not None:
                 frontmatter_props.page_id = alias_props.confluence_page_id
             if alias_props.confluence_space_key is not None:
                 frontmatter_props.space_key = alias_props.confluence_space_key
             props = coalesce(body_props, frontmatter_props)
+            start_line_number = frontmatter.outer_line_count + 1
         else:
             props = body_props
+            start_line_number = 1
-        return ScannedDocument(properties=props, text=text)
+        return ScannedDocument(properties=props, text=text, start_line_number=start_line_number)

md2conf/serializer.py CHANGED Viewed

@@ -8,7 +8,7 @@ Copyright 2022-2026, Levente Hunyadi
 import sys
 from datetime import datetime
-from typing import TypeVar
+from typing import TypeVar, cast
 from cattrs.preconf.orjson import make_converter  # spellchecker:disable-line
@@ -53,6 +53,17 @@ def json_to_object(typ: type[T], data: JsonType) -> T:
     return _converter.structure(data, typ)
+def object_to_json(data: object) -> JsonType:
+    """
+    Converts a structured object to a JSON object, ready to be serialized to a JSON string.
+    :param data: Python object to convert to a JSON object.
+    :returns: JSON object, ready to be serialized to a JSON encoded in UTF-8.
+    """
+    return cast(JsonType, _converter.unstructure(data))
 def object_to_json_payload(data: object) -> bytes:
     """
     Converts a structured object to a JSON string encoded in UTF-8.

markdown-to-confluence 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

markdown-to-confluence 0.5.3py3-none-any.whl → 0.5.5py3-none-any.whl