markdown-to-confluence 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/csf.py ADDED
@@ -0,0 +1,151 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ import importlib.resources as resources
10
+ import re
11
+ from pathlib import Path
12
+ from typing import Callable, TypeVar
13
+
14
+ import lxml.etree as ET
15
+ from lxml.builder import ElementMaker
16
+
17
+ # XML namespaces typically associated with Confluence Storage Format documents
18
+ _namespaces = {
19
+ "ac": "http://atlassian.com/content",
20
+ "ri": "http://atlassian.com/resource/identifier",
21
+ }
22
+ for key, value in _namespaces.items():
23
+ ET.register_namespace(key, value)
24
+
25
+ HTML = ElementMaker()
26
+ AC_ELEM = ElementMaker(namespace=_namespaces["ac"])
27
+ RI_ELEM = ElementMaker(namespace=_namespaces["ri"])
28
+
29
+
30
+ class ParseError(RuntimeError):
31
+ pass
32
+
33
+
34
+ def _qname(namespace_uri: str, name: str) -> str:
35
+ return ET.QName(namespace_uri, name).text
36
+
37
+
38
+ def AC_ATTR(name: str) -> str:
39
+ return _qname(_namespaces["ac"], name)
40
+
41
+
42
+ def RI_ATTR(name: str) -> str:
43
+ return _qname(_namespaces["ri"], name)
44
+
45
+
46
+ R = TypeVar("R")
47
+
48
+
49
+ def with_entities(func: Callable[[Path], R]) -> R:
50
+ "Invokes a callable in the context of an entity definition file."
51
+
52
+ resource_path = resources.files(__package__).joinpath("entities.dtd")
53
+ with resources.as_file(resource_path) as dtd_path:
54
+ return func(dtd_path)
55
+
56
+
57
+ def _elements_from_strings(dtd_path: Path, items: list[str]) -> ET._Element:
58
+ """
59
+ Creates an XML document tree from XML fragment strings.
60
+
61
+ This function
62
+ * adds an XML declaration,
63
+ * wraps the content in a root element,
64
+ * adds namespace declarations associated with Confluence documents.
65
+
66
+ :param dtd_path: Path to a DTD document that defines entities like `¢` or `©`.
67
+ :param items: Strings to parse into XML fragments.
68
+ :returns: An XML document as an element tree.
69
+ """
70
+
71
+ parser = ET.XMLParser(
72
+ remove_blank_text=True,
73
+ remove_comments=True,
74
+ strip_cdata=False,
75
+ load_dtd=True,
76
+ )
77
+
78
+ ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in _namespaces.items())
79
+
80
+ data = [
81
+ '<?xml version="1.0"?>',
82
+ f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
83
+ ]
84
+ data.extend(items)
85
+ data.append("</root>")
86
+
87
+ try:
88
+ return ET.fromstringlist(data, parser=parser)
89
+ except ET.XMLSyntaxError as ex:
90
+ raise ParseError() from ex
91
+
92
+
93
+ def elements_from_strings(items: list[str]) -> ET._Element:
94
+ """
95
+ Creates a Confluence Storage Format XML document tree from XML fragment strings.
96
+
97
+ A root element is created to hold several XML fragments.
98
+
99
+ :param items: Strings to parse into XML fragments.
100
+ :returns: An XML document as an element tree.
101
+ """
102
+
103
+ return with_entities(lambda dtd_path: _elements_from_strings(dtd_path, items))
104
+
105
+
106
+ def elements_from_string(content: str) -> ET._Element:
107
+ """
108
+ Creates a Confluence Storage Format XML document tree from an XML string.
109
+
110
+ :param content: String to parse into XML.
111
+ :returns: An XML document as an element tree.
112
+ """
113
+
114
+ return elements_from_strings([content])
115
+
116
+
117
+ def _content_to_string(dtd_path: Path, content: str) -> str:
118
+ tree = _elements_from_strings(dtd_path, [content])
119
+ return ET.tostring(tree, pretty_print=True).decode("utf-8")
120
+
121
+
122
+ def content_to_string(content: str) -> str:
123
+ """
124
+ Converts a Confluence Storage Format document returned by the Confluence REST API into a readable XML document.
125
+
126
+ This function
127
+ * adds an XML declaration,
128
+ * wraps the content in a root element,
129
+ * adds namespace declarations associated with Confluence documents.
130
+
131
+ :param content: Confluence Storage Format content as a string.
132
+ :returns: XML as a string.
133
+ """
134
+
135
+ return with_entities(lambda dtd_path: _content_to_string(dtd_path, content))
136
+
137
+
138
+ def elements_to_string(root: ET._Element) -> str:
139
+ """
140
+ Converts a Confluence Storage Format element tree into an XML string to push to Confluence REST API.
141
+
142
+ :param root: Synthesized XML element tree of a Confluence Storage Format document.
143
+ :returns: XML as a string.
144
+ """
145
+
146
+ xml = ET.tostring(root, encoding="utf8", method="xml").decode("utf8")
147
+ m = re.match(r"^<root\s+[^>]*>(.*)</root>\s*$", xml, re.DOTALL)
148
+ if m:
149
+ return m.group(1)
150
+ else:
151
+ raise ValueError("expected: Confluence content")
md2conf/local.py CHANGED
@@ -83,7 +83,7 @@ class LocalProcessor(Processor):
83
83
  os.makedirs(csf_dir, exist_ok=True)
84
84
  with open(csf_path, "w", encoding="utf-8") as f:
85
85
  f.write(content)
86
- for name, data in document.embedded_images.items():
86
+ for name, data in document.embedded_files.items():
87
87
  with open(csf_dir / name, "wb") as f:
88
88
  f.write(data)
89
89
 
md2conf/markdown.py CHANGED
@@ -28,18 +28,19 @@ def _emoji_generator(
28
28
  """
29
29
 
30
30
  name = (alias or shortname).strip(":")
31
- span = xml.etree.ElementTree.Element("span", {"data-emoji-shortname": name})
31
+ emoji = xml.etree.ElementTree.Element("x-emoji", {"data-shortname": name})
32
32
  if uc is not None:
33
- span.attrib["data-emoji-unicode"] = uc
33
+ emoji.attrib["data-unicode"] = uc
34
34
 
35
35
  # convert series of Unicode code point hexadecimal values into characters
36
- span.text = "".join(chr(int(item, base=16)) for item in uc.split("-"))
36
+ emoji.text = "".join(chr(int(item, base=16)) for item in uc.split("-"))
37
37
  else:
38
- span.text = alt
39
- return span
38
+ emoji.text = alt
40
39
 
40
+ return emoji
41
41
 
42
- def _math_formatter(
42
+
43
+ def _verbatim_formatter(
43
44
  source: str,
44
45
  language: str,
45
46
  css_class: str,
@@ -51,7 +52,9 @@ def _math_formatter(
51
52
  **kwargs: Any,
52
53
  ) -> str:
53
54
  """
54
- Custom formatter for language `math` in `pymdownx.superfences`.
55
+ Custom formatter for `pymdownx.superfences`.
56
+
57
+ Used by language `math` (a.k.a. `pymdownx.arithmatex`) and pseudo-language `csf` (Confluence Storage Format pass-through).
55
58
  """
56
59
 
57
60
  if classes is None:
@@ -83,13 +86,16 @@ _CONVERTER = markdown.Markdown(
83
86
  extension_configs={
84
87
  "footnotes": {"BACKLINK_TITLE": ""},
85
88
  "pymdownx.arithmatex": {"generic": True, "preview": False, "tex_inline_wrap": ["", ""], "tex_block_wrap": ["", ""]},
86
- "pymdownx.emoji": {
87
- "emoji_generator": _emoji_generator,
88
- },
89
+ "pymdownx.emoji": {"emoji_generator": _emoji_generator, "strict": True},
89
90
  "pymdownx.highlight": {
90
91
  "use_pygments": False,
91
92
  },
92
- "pymdownx.superfences": {"custom_fences": [{"name": "math", "class": "arithmatex", "format": _math_formatter}]},
93
+ "pymdownx.superfences": {
94
+ "custom_fences": [
95
+ {"name": "math", "class": "arithmatex", "format": _verbatim_formatter},
96
+ {"name": "csf", "class": "csf", "format": _verbatim_formatter},
97
+ ]
98
+ },
93
99
  },
94
100
  )
95
101
 
md2conf/toc.py ADDED
@@ -0,0 +1,89 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Optional
11
+
12
+
13
+ @dataclass(eq=True)
14
+ class TableOfContentsEntry:
15
+ """
16
+ Represents a table of contents entry.
17
+
18
+ :param level: The heading level assigned to the entry. Each entry can only contain children whose level is strictly greater than of its parent.
19
+ :param text: The heading text.
20
+ :param children: Direct descendants whose parent is this entry.
21
+ """
22
+
23
+ level: int
24
+ text: str
25
+ children: list["TableOfContentsEntry"]
26
+
27
+ def __init__(self, level: int, text: str, children: Optional[list["TableOfContentsEntry"]] = None) -> None:
28
+ self.level = level
29
+ self.text = text
30
+ self.children = children or []
31
+
32
+
33
+ class TableOfContentsBuilder:
34
+ """
35
+ Builds a table of contents from Markdown headings.
36
+ """
37
+
38
+ _root: TableOfContentsEntry
39
+ _stack: list[TableOfContentsEntry]
40
+
41
+ def __init__(self) -> None:
42
+ self._root = TableOfContentsEntry(0, "<root>")
43
+ self._stack = [self._root]
44
+
45
+ @property
46
+ def tree(self) -> list[TableOfContentsEntry]:
47
+ """
48
+ Table of contents as a hierarchy of headings.
49
+ """
50
+
51
+ return self._root.children
52
+
53
+ def add(self, level: int, text: str) -> None:
54
+ """
55
+ Adds a heading to the table of contents.
56
+
57
+ :param level: Markdown heading level (e.g. `1` for first-level heading).
58
+ :param text: Markdown heading text.
59
+ """
60
+
61
+ if level < 1:
62
+ raise ValueError("expected: Markdown heading level >= 1")
63
+
64
+ # remove any stack items deeper than the current level
65
+ top = self._stack[-1]
66
+ while top.level >= level:
67
+ self._stack.pop()
68
+ top = self._stack[-1]
69
+
70
+ # add the new section under the current top level
71
+ item = TableOfContentsEntry(level, text)
72
+ top.children.append(item)
73
+
74
+ # push new level onto the stack
75
+ self._stack.append(item)
76
+
77
+ def get_title(self) -> Optional[str]:
78
+ """
79
+ Returns a proposed document title.
80
+
81
+ The proposed title is text of the top-level heading if and only if that heading is unique.
82
+
83
+ :returns: Title text, or `None` if no title can be inferred.
84
+ """
85
+
86
+ if len(self.tree) == 1:
87
+ return self.tree[0].text
88
+ else:
89
+ return None
md2conf/uri.py ADDED
@@ -0,0 +1,46 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ import hashlib
10
+ import urllib.parse
11
+ import uuid
12
+ from urllib.parse import urlparse
13
+
14
+
15
+ def to_data_uri(mime: str, data: str) -> str:
16
+ "Generates a data URI with the specified MIME type."
17
+
18
+ # URL-encode data
19
+ encoded = urllib.parse.quote(data, safe=";/?:@&=+$,-_.!~*'()#") # minimal encoding
20
+ return f"data:{mime},{encoded}"
21
+
22
+
23
+ def to_uuid(data: str) -> uuid.UUID:
24
+ "Generates a UUID that represents the data."
25
+
26
+ # create SHA-1 hash of the SVG content
27
+ sha1_hash = hashlib.sha1(data.encode("utf-8")).digest()
28
+
29
+ # generate UUID using the first 16 bytes of the hash
30
+ return uuid.UUID(bytes=sha1_hash[:16])
31
+
32
+
33
+ def to_uuid_urn(data: str) -> str:
34
+ "Generates a UUID URN that represents the data."
35
+
36
+ return f"urn:uuid:{str(to_uuid(data))}"
37
+
38
+
39
+ def is_absolute_url(url: str) -> bool:
40
+ urlparts = urlparse(url)
41
+ return bool(urlparts.scheme) or bool(urlparts.netloc)
42
+
43
+
44
+ def is_relative_url(url: str) -> bool:
45
+ urlparts = urlparse(url)
46
+ return not bool(urlparts.scheme) and not bool(urlparts.netloc)
md2conf/xml.py CHANGED
@@ -1,11 +1,21 @@
1
- from typing import Iterable, Optional, Union
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ from typing import Iterable, Optional
2
10
 
3
11
  import lxml.etree as ET
4
12
 
5
13
 
6
- def _attrs_equal_excluding(attrs1: ET._Attrib, attrs2: ET._Attrib, exclude: set[Union[str, ET.QName]]) -> bool:
14
+ def _attrs_equal_excluding(attrs1: ET._Attrib, attrs2: ET._Attrib, exclude: set[str]) -> bool:
7
15
  """
8
16
  Compares two dictionary objects, excluding keys in the skip set.
17
+
18
+ :param exclude: Attributes to exclude, in `{namespace}name` notation.
9
19
  """
10
20
 
11
21
  # create key sets to compare, excluding keys to be skipped
@@ -23,10 +33,19 @@ def _attrs_equal_excluding(attrs1: ET._Attrib, attrs2: ET._Attrib, exclude: set[
23
33
 
24
34
 
25
35
  class ElementComparator:
26
- skip_attributes: set[Union[str, ET.QName]]
36
+ skip_attributes: set[str]
37
+ skip_elements: set[str]
38
+
39
+ def __init__(self, *, skip_attributes: Optional[Iterable[str]] = None, skip_elements: Optional[Iterable[str]] = None):
40
+ """
41
+ Initializes a new element tree comparator.
42
+
43
+ :param skip_attributes: Attributes to exclude, in `{namespace}name` notation.
44
+ :param skip_elements: Elements to exclude, in `{namespace}name` notation.
45
+ """
27
46
 
28
- def __init__(self, *, skip_attributes: Optional[Iterable[Union[str, ET.QName]]] = None):
29
47
  self.skip_attributes = set(skip_attributes) if skip_attributes else set()
48
+ self.skip_elements = set(skip_elements) if skip_elements else set()
30
49
 
31
50
  def is_equal(self, e1: ET._Element, e2: ET._Element) -> bool:
32
51
  """
@@ -36,35 +55,49 @@ class ElementComparator:
36
55
  if e1.tag != e2.tag:
37
56
  return False
38
57
 
39
- e1_text = e1.text.strip() if e1.text else ""
40
- e2_text = e2.text.strip() if e2.text else ""
41
- if e1_text != e2_text:
42
- return False
43
-
58
+ # compare tail first, which is outside of element
44
59
  e1_tail = e1.tail.strip() if e1.tail else ""
45
60
  e2_tail = e2.tail.strip() if e2.tail else ""
46
61
  if e1_tail != e2_tail:
47
62
  return False
48
63
 
64
+ # skip element (and content) if on ignore list
65
+ if e1.tag in self.skip_elements:
66
+ return True
67
+
68
+ # compare text second, which is encapsulated by element
69
+ e1_text = e1.text.strip() if e1.text else ""
70
+ e2_text = e2.text.strip() if e2.text else ""
71
+ if e1_text != e2_text:
72
+ return False
73
+
74
+ # compare attributes, disregarding definition order
49
75
  if not _attrs_equal_excluding(e1.attrib, e2.attrib, self.skip_attributes):
50
76
  return False
77
+
78
+ # compare children recursively
51
79
  if len(e1) != len(e2):
52
80
  return False
53
81
  return all(self.is_equal(c1, c2) for c1, c2 in zip(e1, e2))
54
82
 
55
83
 
56
84
  def is_xml_equal(
57
- tree1: ET._Element,
58
- tree2: ET._Element,
59
- *,
60
- skip_attributes: Optional[Iterable[Union[str, ET.QName]]] = None,
85
+ tree1: ET._Element, tree2: ET._Element, *, skip_attributes: Optional[Iterable[str]] = None, skip_elements: Optional[Iterable[str]] = None
61
86
  ) -> bool:
62
87
  """
63
88
  Compare two XML documents for equivalence, ignoring leading/trailing whitespace differences and attribute definition order.
64
89
 
65
90
  :param tree1: XML document as an element tree.
66
91
  :param tree2: XML document as an element tree.
92
+ :param skip_attributes: Attributes to exclude, in `{namespace}name` notation.
93
+ :param skip_elements: Elements to exclude, in `{namespace}name` notation.
67
94
  :returns: True if equivalent, False otherwise.
68
95
  """
69
96
 
70
- return ElementComparator(skip_attributes=skip_attributes).is_equal(tree1, tree2)
97
+ return ElementComparator(skip_attributes=skip_attributes, skip_elements=skip_elements).is_equal(tree1, tree2)
98
+
99
+
100
+ def element_to_text(node: ET._Element) -> str:
101
+ "Returns all text contained in an element as a concatenated string."
102
+
103
+ return "".join(node.itertext()).strip()
md2conf/emoji.py DELETED
@@ -1,83 +0,0 @@
1
- """
2
- Publish Markdown files to Confluence wiki.
3
-
4
- Copyright 2022-2025, Levente Hunyadi
5
-
6
- :see: https://github.com/hunyadi/md2conf
7
- """
8
-
9
- import pathlib
10
-
11
- import pymdownx.emoji1_db as emoji_db
12
-
13
- EMOJI_PAGE_ID = "13500452"
14
-
15
-
16
- def to_html(cp: int) -> str:
17
- """
18
- Returns the safe HTML representation for a Unicode code point.
19
-
20
- Converts non-ASCII and non-printable characters into HTML entities with decimal notation.
21
-
22
- :param cp: Unicode code point.
23
- :returns: An HTML representation of the Unicode character.
24
- """
25
-
26
- ch = chr(cp)
27
- if ch.isascii() and ch.isalnum():
28
- return ch
29
- else:
30
- return f"&#{cp};"
31
-
32
-
33
- def generate_source(path: pathlib.Path) -> None:
34
- "Generates a source Markdown document for testing emojis."
35
-
36
- emojis = emoji_db.emoji
37
-
38
- with open(path, "w") as f:
39
- print(f"<!-- confluence-page-id: {EMOJI_PAGE_ID} -->", file=f)
40
- print("<!-- This file has been generated by a script. -->", file=f)
41
- print(file=f)
42
- print("## Emoji", file=f)
43
- print(file=f)
44
- print("| Icon | Emoji code |", file=f)
45
- print("| ---- | ---------- |", file=f)
46
- for key in emojis.keys():
47
- key = key.strip(":")
48
- print(f"| :{key}: | `:{key}:` |", file=f)
49
-
50
-
51
- def generate_target(path: pathlib.Path) -> None:
52
- "Generates a target Confluence Storage Format (XML) document for testing emojis."
53
-
54
- emojis = emoji_db.emoji
55
-
56
- with open(path, "w") as f:
57
- print('<ac:structured-macro ac:name="info" ac:schema-version="1">', file=f)
58
- print("<ac:rich-text-body>", file=f)
59
- print("<p>This page has been generated with a tool.</p>", file=f)
60
- print("</ac:rich-text-body>", file=f)
61
- print("</ac:structured-macro>", file=f)
62
- print("<h2>Emoji</h2>", file=f)
63
- print("<table>", file=f)
64
- print("<thead><tr><th>Icon</th><th>Emoji code</th></tr></thead>", file=f)
65
- print("<tbody>", file=f)
66
- for key, data in emojis.items():
67
- unicode = data["unicode"]
68
- key = key.strip(":")
69
- html = "".join(to_html(int(item, base=16)) for item in unicode.split("-"))
70
-
71
- print(
72
- f"<tr>\n"
73
- f" <td>\n"
74
- f' <ac:emoticon ac:name="{key}" ac:emoji-shortname=":{key}:" ac:emoji-id="{unicode}" ac:emoji-fallback="{html}"/>\n'
75
- f" </td>\n"
76
- f" <td>\n"
77
- f" <code>:{key}:</code>\n"
78
- f" </td>\n"
79
- f"</tr>",
80
- file=f,
81
- )
82
- print("</tbody>", file=f)
83
- print("</table>", file=f)