markdown-to-confluence 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/domain.py ADDED
@@ -0,0 +1,46 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Literal, Optional
11
+
12
+
13
+ @dataclass
14
+ class ConfluencePageID:
15
+ page_id: str
16
+
17
+
18
+ @dataclass
19
+ class ConfluenceDocumentOptions:
20
+ """
21
+ Options that control the generated page content.
22
+
23
+ :param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
24
+ plain text; when false, raise an exception.
25
+ :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
26
+ conversion rules for the identifier.
27
+ :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
28
+ :param root_page_id: Confluence page to assume root page role for publishing a directory of Markdown files.
29
+ :param keep_hierarchy: Whether to maintain source directory structure when exporting to Confluence.
30
+ :param prefer_raster: Whether to choose PNG files over SVG files when available.
31
+ :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
32
+ :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
33
+ :param diagram_output_format: Target image format for diagrams.
34
+ :param webui_links: When true, convert relative URLs to Confluence Web UI links.
35
+ """
36
+
37
+ ignore_invalid_url: bool = False
38
+ heading_anchors: bool = False
39
+ generated_by: Optional[str] = "This page has been generated with a tool."
40
+ root_page_id: Optional[ConfluencePageID] = None
41
+ keep_hierarchy: bool = False
42
+ prefer_raster: bool = True
43
+ render_drawio: bool = False
44
+ render_mermaid: bool = False
45
+ diagram_output_format: Literal["png", "svg"] = "png"
46
+ webui_links: bool = False
md2conf/drawio.py ADDED
@@ -0,0 +1,271 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ import base64
10
+ import logging
11
+ import os
12
+ import os.path
13
+ import shutil
14
+ import subprocess
15
+ import typing
16
+ import zlib
17
+ from pathlib import Path
18
+ from struct import unpack
19
+ from urllib.parse import unquote_to_bytes
20
+
21
+ import lxml.etree as ET
22
+
23
+ LOGGER = logging.getLogger(__name__)
24
+
25
+
26
+ class DrawioError(ValueError):
27
+ """
28
+ Raised when the input does not adhere to the draw.io document format, or processing the input into a draw.io diagram fails.
29
+
30
+ Examples include:
31
+
32
+ * invalid or corrupt PNG file
33
+ * PNG chunk with embedded diagram data not found
34
+ * the structure of the outer XML does not match the expected format
35
+ * URL decoding error
36
+ * decompression error during INFLATE
37
+ """
38
+
39
+
40
+ def inflate(data: bytes) -> bytes:
41
+ """
42
+ Decompresses (inflates) data compressed using the raw DEFLATE algorithm.
43
+
44
+ :param data: Compressed data using raw DEFLATE format.
45
+ :returns: Uncompressed data.
46
+ """
47
+
48
+ # -zlib.MAX_WBITS indicates raw DEFLATE stream (no zlib/gzip headers)
49
+ return zlib.decompress(data, -zlib.MAX_WBITS)
50
+
51
+
52
+ def decompress_diagram(xml_data: typing.Union[bytes, str]) -> ET._Element:
53
+ """
54
+ Decompresses the text content of the `<diagram>` element in a draw.io XML document.
55
+
56
+ If the data is not compressed, the de-serialized XML element tree is returned.
57
+
58
+ Expected input (as `bytes` or `str`):
59
+ ```
60
+ <mxfile>
61
+ <diagram>... ENCODED_COMPRESSED_DATA ...</diagram>
62
+ </mxfile>
63
+ ```
64
+
65
+ Output (as XML element tree):
66
+ ```
67
+ <mxfile>
68
+ <diagram>
69
+ <mxGraphModel>
70
+ <root>
71
+ ...
72
+ </root>
73
+ </mxGraphModel>
74
+ </diagram>
75
+ </mxfile>
76
+ ```
77
+
78
+ :param xml_data: The serialized XML document.
79
+ :returns: XML element tree with the text contained within the `<diagram>` element expanded into a sub-tree.
80
+ """
81
+
82
+ try:
83
+ root = ET.fromstring(xml_data)
84
+ except ET.ParseError as e:
85
+ raise DrawioError("invalid outer XML") from e
86
+
87
+ if root.tag != "mxfile":
88
+ raise DrawioError("root element is not `<mxfile>`")
89
+
90
+ diagram_elem = root.find("diagram")
91
+ if diagram_elem is None:
92
+ raise DrawioError("`<diagram>` element not found")
93
+
94
+ if len(diagram_elem) > 0:
95
+ # already decompressed
96
+ return root
97
+
98
+ if diagram_elem.text is None:
99
+ raise DrawioError("`<diagram>` element has no data")
100
+
101
+ # reverse base64-encoding of inner data
102
+ try:
103
+ base64_decoded = base64.b64decode(diagram_elem.text, validate=True)
104
+ except ValueError as e:
105
+ raise DrawioError("raw text data in `<diagram>` element is not properly Base64-encoded") from e
106
+
107
+ # decompress inner data
108
+ try:
109
+ embedded_data = inflate(base64_decoded)
110
+ except zlib.error as e:
111
+ raise DrawioError("`<diagram>` element text data cannot be decompressed using INFLATE") from e
112
+
113
+ # reverse URL-encoding of inner data
114
+ try:
115
+ url_decoded = unquote_to_bytes(embedded_data)
116
+ except ValueError as e:
117
+ raise DrawioError("decompressed data in `<diagram>` element is not properly URL-encoded") from e
118
+
119
+ # create sub-tree from decompressed data
120
+ try:
121
+ tree = ET.fromstring(url_decoded)
122
+ except ET.ParseError as e:
123
+ raise DrawioError("invalid inner XML extracted from `<diagram>` element") from e
124
+
125
+ # update document
126
+ diagram_elem.text = None
127
+ diagram_elem.append(tree)
128
+
129
+ return root
130
+
131
+
132
+ def extract_xml_from_png(png_data: bytes) -> ET._Element:
133
+ """
134
+ Extracts an editable draw.io diagram from a PNG file.
135
+
136
+ :param png_data: PNG binary data, with an embedded draw.io diagram.
137
+ :returns: XML element tree of a draw.io diagram.
138
+ """
139
+
140
+ # PNG signature is always the first 8 bytes
141
+ png_signature = b"\x89PNG\r\n\x1a\n"
142
+ if not png_data.startswith(png_signature):
143
+ raise DrawioError("not a valid PNG file")
144
+
145
+ offset = len(png_signature)
146
+ while offset < len(png_data):
147
+ if offset + 8 > len(png_data):
148
+ raise DrawioError("corrupted PNG: incomplete chunk header")
149
+
150
+ # read chunk length (4 bytes) and type (4 bytes)
151
+ (length,) = unpack(">I", png_data[offset : offset + 4])
152
+ chunk_type = png_data[offset + 4 : offset + 8]
153
+ offset += 8
154
+
155
+ if offset + length + 4 > len(png_data):
156
+ raise DrawioError(f"corrupted PNG: incomplete data for chunk {chunk_type.decode('ascii')}")
157
+
158
+ # read chunk data
159
+ chunk_data = png_data[offset : offset + length]
160
+ offset += length
161
+
162
+ # skip CRC (4 bytes)
163
+ offset += 4
164
+
165
+ # extracts draw.io diagram data from a `tEXt` chunk with the keyword `mxfile` embedded in a PNG
166
+ if chunk_type != b"tEXt":
167
+ continue
168
+
169
+ # format: keyword\0text
170
+ null_pos = chunk_data.find(b"\x00")
171
+ if null_pos < 0:
172
+ raise DrawioError("corrupted PNG: tEXt chunk missing keyword")
173
+
174
+ keyword = chunk_data[:null_pos].decode("latin1")
175
+ if keyword != "mxfile":
176
+ continue
177
+
178
+ textual_data = chunk_data[null_pos + 1 :]
179
+
180
+ try:
181
+ url_decoded = unquote_to_bytes(textual_data)
182
+ except ValueError as e:
183
+ raise DrawioError("data in `tEXt` chunk is not properly URL-encoded") from e
184
+
185
+ # decompress data embedded in the outer XML wrapper
186
+ return decompress_diagram(url_decoded)
187
+
188
+ # matching `tEXt` chunk not found
189
+ raise DrawioError("not a PNG file made with draw.io")
190
+
191
+
192
+ def extract_xml_from_svg(svg_data: bytes) -> ET._Element:
193
+ """
194
+ Extracts an editable draw.io diagram from an SVG file.
195
+
196
+ :param svg_data: SVG XML data, with an embedded draw.io diagram.
197
+ :returns: XML element tree of a draw.io diagram.
198
+ """
199
+
200
+ try:
201
+ root = ET.fromstring(svg_data)
202
+ except ET.ParseError as e:
203
+ raise DrawioError("invalid SVG XML") from e
204
+
205
+ content = root.attrib.get("content")
206
+ if content is None:
207
+ raise DrawioError("SVG root element has no attribute `content`")
208
+
209
+ return decompress_diagram(content)
210
+
211
+
212
+ def extract_diagram(path: Path) -> bytes:
213
+ """
214
+ Extracts an editable draw.io diagram from a PNG file.
215
+
216
+ :param path: Path to a PNG or SVG file with an embedded draw.io diagram.
217
+ :returns: XML data of a draw.io diagram as bytes.
218
+ """
219
+
220
+ if path.name.endswith(".drawio.png"):
221
+ with open(path, "rb") as png_file:
222
+ root = extract_xml_from_png(png_file.read())
223
+ elif path.name.endswith(".drawio.svg"):
224
+ with open(path, "rb") as svg_file:
225
+ root = extract_xml_from_svg(svg_file.read())
226
+ else:
227
+ raise DrawioError(f"unrecognized file type for {path.name}")
228
+
229
+ return ET.tostring(root, encoding="utf8", method="xml")
230
+
231
+
232
+ def render_diagram(source: Path, output_format: typing.Literal["png", "svg"] = "png") -> bytes:
233
+ "Generates a PNG or SVG image from a draw.io diagram source."
234
+
235
+ executable = shutil.which("draw.io")
236
+ if executable is None:
237
+ raise DrawioError("draw.io executable not found")
238
+
239
+ target = f"tmp_drawio.{output_format}"
240
+
241
+ cmd = [executable, "--export", "--format", output_format, "--output", target]
242
+ if output_format == "png":
243
+ cmd.extend(["--scale", "2", "--transparent"])
244
+ elif output_format == "svg":
245
+ cmd.append("--embed-svg-images")
246
+ cmd.append(str(source))
247
+
248
+ LOGGER.debug("Executing: %s", " ".join(cmd))
249
+ try:
250
+ proc = subprocess.Popen(
251
+ cmd,
252
+ stdout=subprocess.PIPE,
253
+ stderr=subprocess.PIPE,
254
+ text=False,
255
+ )
256
+ stdout, stderr = proc.communicate()
257
+ if proc.returncode:
258
+ messages = [f"failed to convert draw.io diagram; exit code: {proc.returncode}"]
259
+ console_output = stdout.decode("utf-8")
260
+ if console_output:
261
+ messages.append(f"output:\n{console_output}")
262
+ console_error = stderr.decode("utf-8")
263
+ if console_error:
264
+ messages.append(f"error:\n{console_error}")
265
+ raise DrawioError("\n".join(messages))
266
+ with open(target, "rb") as f:
267
+ return f.read()
268
+
269
+ finally:
270
+ if os.path.exists(target):
271
+ os.remove(target)
md2conf/local.py CHANGED
@@ -11,7 +11,8 @@ import os
11
11
  from pathlib import Path
12
12
  from typing import Optional
13
13
 
14
- from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
14
+ from .converter import ConfluenceDocument
15
+ from .domain import ConfluenceDocumentOptions, ConfluencePageID
15
16
  from .extra import override
16
17
  from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
17
18
  from .processor import Converter, DocumentNode, Processor, ProcessorFactory
@@ -66,6 +67,7 @@ class LocalProcessor(Processor):
66
67
  page_id=page_id,
67
68
  space_key=node.space_key or self.site.space_key or "HOME",
68
69
  title=node.title or "",
70
+ synchronized=node.synchronized,
69
71
  ),
70
72
  )
71
73
 
@@ -76,10 +78,14 @@ class LocalProcessor(Processor):
76
78
  """
77
79
 
78
80
  content = document.xhtml()
79
- out_path = self.out_dir / path.relative_to(self.root_dir).with_suffix(".csf")
80
- os.makedirs(out_path.parent, exist_ok=True)
81
- with open(out_path, "w", encoding="utf-8") as f:
81
+ csf_path = self.out_dir / path.relative_to(self.root_dir).with_suffix(".csf")
82
+ csf_dir = csf_path.parent
83
+ os.makedirs(csf_dir, exist_ok=True)
84
+ with open(csf_path, "w", encoding="utf-8") as f:
82
85
  f.write(content)
86
+ for name, data in document.embedded_images.items():
87
+ with open(csf_dir / name, "wb") as f:
88
+ f.write(data)
83
89
 
84
90
 
85
91
  class LocalProcessorFactory(ProcessorFactory):
md2conf/markdown.py ADDED
@@ -0,0 +1,108 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ import xml.etree.ElementTree
10
+ from typing import Any, Optional
11
+
12
+ import markdown
13
+
14
+
15
+ def _emoji_generator(
16
+ index: str,
17
+ shortname: str,
18
+ alias: Optional[str],
19
+ uc: Optional[str],
20
+ alt: str,
21
+ title: Optional[str],
22
+ category: Optional[str],
23
+ options: dict[str, Any],
24
+ md: markdown.Markdown,
25
+ ) -> xml.etree.ElementTree.Element:
26
+ """
27
+ Custom generator for `pymdownx.emoji`.
28
+ """
29
+
30
+ name = (alias or shortname).strip(":")
31
+ span = xml.etree.ElementTree.Element("span", {"data-emoji-shortname": name})
32
+ if uc is not None:
33
+ span.attrib["data-emoji-unicode"] = uc
34
+
35
+ # convert series of Unicode code point hexadecimal values into characters
36
+ span.text = "".join(chr(int(item, base=16)) for item in uc.split("-"))
37
+ else:
38
+ span.text = alt
39
+ return span
40
+
41
+
42
+ def _math_formatter(
43
+ source: str,
44
+ language: str,
45
+ css_class: str,
46
+ options: dict[str, Any],
47
+ md: markdown.Markdown,
48
+ classes: Optional[list[str]] = None,
49
+ id_value: str = "",
50
+ attrs: Optional[dict[str, str]] = None,
51
+ **kwargs: Any,
52
+ ) -> str:
53
+ """
54
+ Custom formatter for language `math` in `pymdownx.superfences`.
55
+ """
56
+
57
+ if classes is None:
58
+ classes = [css_class]
59
+ else:
60
+ classes.insert(0, css_class)
61
+
62
+ html_id = f' id="{id_value}"' if id_value else ""
63
+ html_class = ' class="{}"'.format(" ".join(classes))
64
+ html_attrs = " " + " ".join(f'{k}="{v}"' for k, v in attrs.items()) if attrs else ""
65
+
66
+ return f"<div{html_id}{html_class}{html_attrs}>{source}</div>"
67
+
68
+
69
+ _CONVERTER = markdown.Markdown(
70
+ extensions=[
71
+ "admonition",
72
+ "footnotes",
73
+ "markdown.extensions.tables",
74
+ "md_in_html",
75
+ "pymdownx.arithmatex",
76
+ "pymdownx.emoji",
77
+ "pymdownx.highlight", # required by `pymdownx.superfences`
78
+ "pymdownx.magiclink",
79
+ "pymdownx.superfences",
80
+ "pymdownx.tilde",
81
+ "sane_lists",
82
+ ],
83
+ extension_configs={
84
+ "footnotes": {"BACKLINK_TITLE": ""},
85
+ "pymdownx.arithmatex": {"generic": True, "preview": False, "tex_inline_wrap": ["", ""], "tex_block_wrap": ["", ""]},
86
+ "pymdownx.emoji": {
87
+ "emoji_generator": _emoji_generator,
88
+ },
89
+ "pymdownx.highlight": {
90
+ "use_pygments": False,
91
+ },
92
+ "pymdownx.superfences": {"custom_fences": [{"name": "math", "class": "arithmatex", "format": _math_formatter}]},
93
+ },
94
+ )
95
+
96
+
97
+ def markdown_to_html(content: str) -> str:
98
+ """
99
+ Converts a Markdown document into XHTML with Python-Markdown.
100
+
101
+ :param content: Markdown input as a string.
102
+ :returns: XHTML output as a string.
103
+ :see: https://python-markdown.github.io/
104
+ """
105
+
106
+ _CONVERTER.reset()
107
+ html = _CONVERTER.convert(content)
108
+ return html
md2conf/matcher.py CHANGED
@@ -10,14 +10,57 @@ import os.path
10
10
  from dataclasses import dataclass
11
11
  from fnmatch import fnmatch
12
12
  from pathlib import Path
13
- from typing import Iterable, Optional, Union, overload
13
+ from typing import Iterable, Optional, Union, final, overload
14
14
 
15
15
 
16
- @dataclass(frozen=True)
16
+ @dataclass(frozen=True, eq=True)
17
+ class _BaseEntry:
18
+ """
19
+ Represents a file or directory entry.
20
+
21
+ Entries are primarily sorted alphabetically case-insensitive.
22
+ When two items are equal case-insensitive, conflicting items are put in case-sensitive order.
23
+
24
+ :param name: Name of the file-system entry.
25
+ """
26
+
27
+ name: str
28
+
29
+ @property
30
+ def lower_name(self) -> str:
31
+ return self.name.lower()
32
+
33
+ def __lt__(self, other: "_BaseEntry") -> bool:
34
+ return (self.lower_name, self.name) < (other.lower_name, other.name)
35
+
36
+ def __le__(self, other: "_BaseEntry") -> bool:
37
+ return (self.lower_name, self.name) <= (other.lower_name, other.name)
38
+
39
+ def __ge__(self, other: "_BaseEntry") -> bool:
40
+ return (self.lower_name, self.name) >= (other.lower_name, other.name)
41
+
42
+ def __gt__(self, other: "_BaseEntry") -> bool:
43
+ return (self.lower_name, self.name) > (other.lower_name, other.name)
44
+
45
+
46
+ @final
47
+ class FileEntry(_BaseEntry):
48
+ pass
49
+
50
+
51
+ @final
52
+ class DirectoryEntry(_BaseEntry):
53
+ pass
54
+
55
+
56
+ @dataclass(frozen=True, eq=True)
17
57
  class Entry:
18
58
  """
19
59
  Represents a file or directory entry.
20
60
 
61
+ When sorted, directories come before files and items are primarily arranged in alphabetical order case-insensitive.
62
+ When two items are equal case-insensitive, conflicting items are put in case-sensitive order.
63
+
21
64
  :param name: Name of the file-system entry to match against the rule-set.
22
65
  :param is_dir: True if the entry is a directory.
23
66
  """
@@ -25,6 +68,22 @@ class Entry:
25
68
  name: str
26
69
  is_dir: bool
27
70
 
71
+ @property
72
+ def lower_name(self) -> str:
73
+ return self.name.lower()
74
+
75
+ def __lt__(self, other: "Entry") -> bool:
76
+ return (not self.is_dir, self.lower_name, self.name) < (not other.is_dir, other.lower_name, other.name)
77
+
78
+ def __le__(self, other: "Entry") -> bool:
79
+ return (not self.is_dir, self.lower_name, self.name) <= (not other.is_dir, other.lower_name, other.name)
80
+
81
+ def __ge__(self, other: "Entry") -> bool:
82
+ return (not self.is_dir, self.lower_name, self.name) >= (not other.is_dir, other.lower_name, other.name)
83
+
84
+ def __gt__(self, other: "Entry") -> bool:
85
+ return (not self.is_dir, self.lower_name, self.name) > (not other.is_dir, other.lower_name, other.name)
86
+
28
87
 
29
88
  @dataclass
30
89
  class MatcherOptions:
@@ -146,9 +205,9 @@ class Matcher:
146
205
  :returns: A filtered list of names that didn't match any of the exclusion rules.
147
206
  """
148
207
 
149
- return [entry for entry in entries if self.is_included(entry)]
208
+ return sorted(entry for entry in entries if self.is_included(entry))
150
209
 
151
- def scandir(self, path: Path) -> list[Entry]:
210
+ def listing(self, path: Path) -> list[Entry]:
152
211
  """
153
212
  Returns only those entries in a directory whose name doesn't match any of the exclusion rules.
154
213
 
md2conf/metadata.py CHANGED
@@ -33,8 +33,10 @@ class ConfluencePageMetadata:
33
33
  :param page_id: Confluence page ID.
34
34
  :param space_key: Confluence space key.
35
35
  :param title: Document title.
36
+ :param synchronized: True if the document content is parsed and synchronized with Confluence.
36
37
  """
37
38
 
38
39
  page_id: str
39
40
  space_key: str
40
41
  title: str
42
+ synchronized: bool