markdown-to-confluence 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_to_confluence-0.4.3.dist-info → markdown_to_confluence-0.4.5.dist-info}/METADATA +104 -25
- markdown_to_confluence-0.4.5.dist-info/RECORD +33 -0
- {markdown_to_confluence-0.4.3.dist-info → markdown_to_confluence-0.4.5.dist-info}/licenses/LICENSE +1 -1
- md2conf/__init__.py +1 -1
- md2conf/__main__.py +19 -4
- md2conf/api.py +9 -1
- md2conf/application.py +16 -8
- md2conf/converter.py +835 -579
- md2conf/csf.py +217 -0
- md2conf/domain.py +2 -0
- md2conf/drawio.py +18 -14
- md2conf/latex.py +245 -0
- md2conf/local.py +2 -2
- md2conf/markdown.py +19 -11
- md2conf/mermaid.py +21 -27
- md2conf/text.py +54 -0
- md2conf/toc.py +89 -0
- md2conf/uri.py +46 -0
- md2conf/xml.py +84 -14
- markdown_to_confluence-0.4.3.dist-info/RECORD +0 -29
- md2conf/emoji.py +0 -83
- {markdown_to_confluence-0.4.3.dist-info → markdown_to_confluence-0.4.5.dist-info}/WHEEL +0 -0
- {markdown_to_confluence-0.4.3.dist-info → markdown_to_confluence-0.4.5.dist-info}/entry_points.txt +0 -0
- {markdown_to_confluence-0.4.3.dist-info → markdown_to_confluence-0.4.5.dist-info}/top_level.txt +0 -0
- {markdown_to_confluence-0.4.3.dist-info → markdown_to_confluence-0.4.5.dist-info}/zip-safe +0 -0
md2conf/converter.py
CHANGED
|
@@ -6,60 +6,59 @@ Copyright 2022-2025, Levente Hunyadi
|
|
|
6
6
|
:see: https://github.com/hunyadi/md2conf
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
# mypy: disable-error-code="dict-item"
|
|
10
|
-
|
|
11
9
|
import dataclasses
|
|
10
|
+
import enum
|
|
12
11
|
import hashlib
|
|
13
|
-
import importlib.resources as resources
|
|
14
12
|
import logging
|
|
15
13
|
import os.path
|
|
16
14
|
import re
|
|
17
15
|
import uuid
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
18
17
|
from dataclasses import dataclass
|
|
19
18
|
from pathlib import Path
|
|
20
|
-
from typing import
|
|
21
|
-
from urllib.parse import ParseResult, quote_plus, urlparse
|
|
19
|
+
from typing import ClassVar, Literal, Optional, Union
|
|
20
|
+
from urllib.parse import ParseResult, quote_plus, urlparse
|
|
22
21
|
|
|
23
22
|
import lxml.etree as ET
|
|
24
|
-
from lxml.builder import ElementMaker
|
|
25
23
|
from strong_typing.core import JsonType
|
|
26
24
|
|
|
27
25
|
from . import drawio, mermaid
|
|
28
26
|
from .collection import ConfluencePageCollection
|
|
27
|
+
from .csf import AC_ATTR, AC_ELEM, HTML, RI_ATTR, RI_ELEM, ParseError, elements_from_strings, elements_to_string, normalize_inline
|
|
29
28
|
from .domain import ConfluenceDocumentOptions, ConfluencePageID
|
|
30
|
-
from .extra import path_relative_to
|
|
29
|
+
from .extra import override, path_relative_to
|
|
30
|
+
from .latex import get_png_dimensions, remove_png_chunks, render_latex
|
|
31
31
|
from .markdown import markdown_to_html
|
|
32
32
|
from .metadata import ConfluenceSiteMetadata
|
|
33
33
|
from .properties import PageError
|
|
34
34
|
from .scanner import ScannedDocument, Scanner
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
"ri": "http://atlassian.com/resource/identifier",
|
|
39
|
-
}
|
|
40
|
-
for key, value in namespaces.items():
|
|
41
|
-
ET.register_namespace(key, value)
|
|
35
|
+
from .toc import TableOfContentsBuilder
|
|
36
|
+
from .uri import is_absolute_url, to_uuid_urn
|
|
37
|
+
from .xml import element_to_text
|
|
42
38
|
|
|
43
39
|
|
|
44
|
-
def get_volatile_attributes() -> list[
|
|
40
|
+
def get_volatile_attributes() -> list[str]:
|
|
45
41
|
"Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
|
|
46
42
|
|
|
47
43
|
return [
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
44
|
+
AC_ATTR("local-id"),
|
|
45
|
+
AC_ATTR("macro-id"),
|
|
46
|
+
RI_ATTR("version-at-save"),
|
|
51
47
|
]
|
|
52
48
|
|
|
53
49
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
RI = ElementMaker(namespace=namespaces["ri"])
|
|
50
|
+
def get_volatile_elements() -> list[str]:
|
|
51
|
+
"Returns a list of volatile elements whose content frequently changes as a Confluence storage format XHTML document is updated."
|
|
57
52
|
|
|
58
|
-
|
|
53
|
+
return [AC_ATTR("task-uuid")]
|
|
59
54
|
|
|
60
55
|
|
|
61
|
-
|
|
62
|
-
|
|
56
|
+
status_images: dict[str, str] = {
|
|
57
|
+
to_uuid_urn(f'<svg height="10" width="10" xmlns="http://www.w3.org/2000/svg"><circle r="5" cx="5" cy="5" fill="{color}" /></svg>'): color
|
|
58
|
+
for color in ["gray", "purple", "blue", "red", "yellow", "green"]
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
LOGGER = logging.getLogger(__name__)
|
|
63
62
|
|
|
64
63
|
|
|
65
64
|
def starts_with_any(text: str, prefixes: list[str]) -> bool:
|
|
@@ -71,16 +70,6 @@ def starts_with_any(text: str, prefixes: list[str]) -> bool:
|
|
|
71
70
|
return False
|
|
72
71
|
|
|
73
72
|
|
|
74
|
-
def is_absolute_url(url: str) -> bool:
|
|
75
|
-
urlparts = urlparse(url)
|
|
76
|
-
return bool(urlparts.scheme) or bool(urlparts.netloc)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def is_relative_url(url: str) -> bool:
|
|
80
|
-
urlparts = urlparse(url)
|
|
81
|
-
return not bool(urlparts.scheme) and not bool(urlparts.netloc)
|
|
82
|
-
|
|
83
|
-
|
|
84
73
|
def is_directory_within(absolute_path: Path, base_path: Path) -> bool:
|
|
85
74
|
"True if the absolute path is nested within the base path."
|
|
86
75
|
|
|
@@ -100,132 +89,94 @@ def encode_title(text: str) -> str:
|
|
|
100
89
|
return quote_plus(text.strip())
|
|
101
90
|
|
|
102
91
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
:
|
|
108
|
-
:
|
|
109
|
-
:
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
92
|
+
# supported code block languages, for which syntax highlighting is available
|
|
93
|
+
_LANGUAGES = {
|
|
94
|
+
"abap": "abap",
|
|
95
|
+
"actionscript3": "actionscript3",
|
|
96
|
+
"ada": "ada",
|
|
97
|
+
"applescript": "applescript",
|
|
98
|
+
"arduino": "arduino",
|
|
99
|
+
"autoit": "autoit",
|
|
100
|
+
"bash": "bash",
|
|
101
|
+
"c": "c",
|
|
102
|
+
"c#": "c#",
|
|
103
|
+
"clojure": "clojure",
|
|
104
|
+
"coffeescript": "coffeescript",
|
|
105
|
+
"coldfusion": "coldfusion",
|
|
106
|
+
"cpp": "cpp",
|
|
107
|
+
"csharp": "c#",
|
|
108
|
+
"css": "css",
|
|
109
|
+
"cuda": "cuda",
|
|
110
|
+
"d": "d",
|
|
111
|
+
"dart": "dart",
|
|
112
|
+
"delphi": "delphi",
|
|
113
|
+
"diff": "diff",
|
|
114
|
+
"elixir": "elixir",
|
|
115
|
+
"erl": "erl",
|
|
116
|
+
"erlang": "erl",
|
|
117
|
+
"fortran": "fortran",
|
|
118
|
+
"foxpro": "foxpro",
|
|
119
|
+
"go": "go",
|
|
120
|
+
"graphql": "graphql",
|
|
121
|
+
"groovy": "groovy",
|
|
122
|
+
"haskell": "haskell",
|
|
123
|
+
"haxe": "haxe",
|
|
124
|
+
"html": "html",
|
|
125
|
+
"java": "java",
|
|
126
|
+
"javafx": "javafx",
|
|
127
|
+
"javascript": "js",
|
|
128
|
+
"js": "js",
|
|
129
|
+
"json": "json",
|
|
130
|
+
"jsx": "jsx",
|
|
131
|
+
"julia": "julia",
|
|
132
|
+
"kotlin": "kotlin",
|
|
133
|
+
"livescript": "livescript",
|
|
134
|
+
"lua": "lua",
|
|
135
|
+
"mermaid": "mermaid",
|
|
136
|
+
"mathematica": "mathematica",
|
|
137
|
+
"matlab": "matlab",
|
|
138
|
+
"objectivec": "objectivec",
|
|
139
|
+
"objectivej": "objectivej",
|
|
140
|
+
"ocaml": "ocaml",
|
|
141
|
+
"octave": "octave",
|
|
142
|
+
"pascal": "pascal",
|
|
143
|
+
"perl": "perl",
|
|
144
|
+
"php": "php",
|
|
145
|
+
"powershell": "powershell",
|
|
146
|
+
"prolog": "prolog",
|
|
147
|
+
"puppet": "puppet",
|
|
148
|
+
"py": "py",
|
|
149
|
+
"python": "py",
|
|
150
|
+
"qml": "qml",
|
|
151
|
+
"r": "r",
|
|
152
|
+
"racket": "racket",
|
|
153
|
+
"rst": "rst",
|
|
154
|
+
"ruby": "ruby",
|
|
155
|
+
"rust": "rust",
|
|
156
|
+
"sass": "sass",
|
|
157
|
+
"scala": "scala",
|
|
158
|
+
"scheme": "scheme",
|
|
159
|
+
"shell": "shell",
|
|
160
|
+
"smalltalk": "smalltalk",
|
|
161
|
+
"splunk": "splunk",
|
|
162
|
+
"sql": "sql",
|
|
163
|
+
"standardml": "standardml",
|
|
164
|
+
"swift": "swift",
|
|
165
|
+
"tcl": "tcl",
|
|
166
|
+
"tex": "tex",
|
|
167
|
+
"tsx": "tsx",
|
|
168
|
+
"typescript": "typescript",
|
|
169
|
+
"vala": "vala",
|
|
170
|
+
"vb": "vb",
|
|
171
|
+
"verilog": "verilog",
|
|
172
|
+
"vhdl": "vhdl",
|
|
173
|
+
"xml": "xml",
|
|
174
|
+
"xquery": "xquery",
|
|
175
|
+
"yaml": "yaml",
|
|
176
|
+
}
|
|
118
177
|
|
|
119
|
-
ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in namespaces.items())
|
|
120
178
|
|
|
121
|
-
|
|
122
|
-
'<?xml version="1.0"?>',
|
|
123
|
-
f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
|
|
124
|
-
]
|
|
125
|
-
data.extend(items)
|
|
126
|
-
data.append("</root>")
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
return ET.fromstringlist(data, parser=parser)
|
|
130
|
-
except ET.XMLSyntaxError as ex:
|
|
131
|
-
raise ParseError() from ex
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def elements_from_strings(items: list[str]) -> ET._Element:
|
|
135
|
-
"Creates a fragment of several XML nodes from their string representation wrapped in a root element."
|
|
136
|
-
|
|
137
|
-
resource_path = resources.files(__package__).joinpath("entities.dtd")
|
|
138
|
-
with resources.as_file(resource_path) as dtd_path:
|
|
139
|
-
return _elements_from_strings(dtd_path, items)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def elements_from_string(content: str) -> ET._Element:
|
|
143
|
-
return elements_from_strings([content])
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
_languages = [
|
|
147
|
-
"abap",
|
|
148
|
-
"actionscript3",
|
|
149
|
-
"ada",
|
|
150
|
-
"applescript",
|
|
151
|
-
"arduino",
|
|
152
|
-
"autoit",
|
|
153
|
-
"bash",
|
|
154
|
-
"c",
|
|
155
|
-
"clojure",
|
|
156
|
-
"coffeescript",
|
|
157
|
-
"coldfusion",
|
|
158
|
-
"cpp",
|
|
159
|
-
"csharp",
|
|
160
|
-
"css",
|
|
161
|
-
"cuda",
|
|
162
|
-
"d",
|
|
163
|
-
"dart",
|
|
164
|
-
"delphi",
|
|
165
|
-
"diff",
|
|
166
|
-
"elixir",
|
|
167
|
-
"erlang",
|
|
168
|
-
"fortran",
|
|
169
|
-
"foxpro",
|
|
170
|
-
"go",
|
|
171
|
-
"graphql",
|
|
172
|
-
"groovy",
|
|
173
|
-
"haskell",
|
|
174
|
-
"haxe",
|
|
175
|
-
"html",
|
|
176
|
-
"java",
|
|
177
|
-
"javafx",
|
|
178
|
-
"javascript",
|
|
179
|
-
"json",
|
|
180
|
-
"jsx",
|
|
181
|
-
"julia",
|
|
182
|
-
"kotlin",
|
|
183
|
-
"livescript",
|
|
184
|
-
"lua",
|
|
185
|
-
"mermaid",
|
|
186
|
-
"mathematica",
|
|
187
|
-
"matlab",
|
|
188
|
-
"objectivec",
|
|
189
|
-
"objectivej",
|
|
190
|
-
"ocaml",
|
|
191
|
-
"octave",
|
|
192
|
-
"pascal",
|
|
193
|
-
"perl",
|
|
194
|
-
"php",
|
|
195
|
-
"powershell",
|
|
196
|
-
"prolog",
|
|
197
|
-
"puppet",
|
|
198
|
-
"python",
|
|
199
|
-
"qml",
|
|
200
|
-
"r",
|
|
201
|
-
"racket",
|
|
202
|
-
"rst",
|
|
203
|
-
"ruby",
|
|
204
|
-
"rust",
|
|
205
|
-
"sass",
|
|
206
|
-
"scala",
|
|
207
|
-
"scheme",
|
|
208
|
-
"shell",
|
|
209
|
-
"smalltalk",
|
|
210
|
-
"splunk",
|
|
211
|
-
"sql",
|
|
212
|
-
"standardml",
|
|
213
|
-
"swift",
|
|
214
|
-
"tcl",
|
|
215
|
-
"tex",
|
|
216
|
-
"tsx",
|
|
217
|
-
"typescript",
|
|
218
|
-
"vala",
|
|
219
|
-
"vb",
|
|
220
|
-
"verilog",
|
|
221
|
-
"vhdl",
|
|
222
|
-
"xml",
|
|
223
|
-
"xquery",
|
|
224
|
-
"yaml",
|
|
225
|
-
]
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
class NodeVisitor:
|
|
179
|
+
class NodeVisitor(ABC):
|
|
229
180
|
def visit(self, node: ET._Element) -> None:
|
|
230
181
|
"Recursively visits all descendants of this node."
|
|
231
182
|
|
|
@@ -236,12 +187,17 @@ class NodeVisitor:
|
|
|
236
187
|
source = node[index]
|
|
237
188
|
target = self.transform(source)
|
|
238
189
|
if target is not None:
|
|
190
|
+
# chain sibling text node that immediately follows original element
|
|
191
|
+
target.tail = source.tail
|
|
192
|
+
source.tail = None
|
|
193
|
+
|
|
194
|
+
# replace original element with transformed element
|
|
239
195
|
node[index] = target
|
|
240
196
|
else:
|
|
241
197
|
self.visit(source)
|
|
242
198
|
|
|
243
|
-
|
|
244
|
-
|
|
199
|
+
@abstractmethod
|
|
200
|
+
def transform(self, child: ET._Element) -> Optional[ET._Element]: ...
|
|
245
201
|
|
|
246
202
|
|
|
247
203
|
def title_to_identifier(title: str) -> str:
|
|
@@ -253,58 +209,107 @@ def title_to_identifier(title: str) -> str:
|
|
|
253
209
|
return s
|
|
254
210
|
|
|
255
211
|
|
|
256
|
-
def
|
|
257
|
-
"
|
|
212
|
+
def element_text_starts_with_any(node: ET._Element, prefixes: list[str]) -> bool:
|
|
213
|
+
"True if the text contained in an element starts with any of the specified prefix strings."
|
|
258
214
|
|
|
259
|
-
|
|
215
|
+
if node.text is None:
|
|
216
|
+
return False
|
|
217
|
+
return starts_with_any(node.text, prefixes)
|
|
260
218
|
|
|
261
219
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
width: Optional[str]
|
|
266
|
-
height: Optional[str]
|
|
220
|
+
def is_placeholder_for(node: ET._Element, name: str) -> bool:
|
|
221
|
+
"""
|
|
222
|
+
Identifies a Confluence widget placeholder, e.g. `[[_TOC_]]` or `[[_LISTING_]]`.
|
|
267
223
|
|
|
224
|
+
:param node: The element to check.
|
|
225
|
+
:param name: The placeholder name.
|
|
226
|
+
"""
|
|
268
227
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
text: str
|
|
228
|
+
# `[[_TOC_]]` is represented in HTML as <p>[[<em>TOC</em>]]</p>
|
|
229
|
+
if node.text != "[[" or len(node) != 1:
|
|
230
|
+
return False
|
|
273
231
|
|
|
232
|
+
child = node[0]
|
|
233
|
+
if child.tag != "em" or child.text != name or child.tail != "]]":
|
|
234
|
+
return False
|
|
274
235
|
|
|
275
|
-
|
|
276
|
-
"Builds a table of contents from Markdown headings."
|
|
236
|
+
return True
|
|
277
237
|
|
|
278
|
-
headings: list[TableOfContentsEntry]
|
|
279
238
|
|
|
280
|
-
|
|
281
|
-
|
|
239
|
+
@enum.unique
|
|
240
|
+
class FormattingContext(enum.Enum):
|
|
241
|
+
"Identifies the formatting context for the element."
|
|
282
242
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
Adds a heading to the table of contents.
|
|
243
|
+
BLOCK = "block"
|
|
244
|
+
INLINE = "inline"
|
|
286
245
|
|
|
287
|
-
:param level: Markdown heading level (e.g. `1` for first-level heading).
|
|
288
|
-
:param text: Markdown heading text.
|
|
289
|
-
"""
|
|
290
246
|
|
|
291
|
-
|
|
247
|
+
@dataclass
|
|
248
|
+
class ImageAttributes:
|
|
249
|
+
"""
|
|
250
|
+
Attributes applied to an `<img>` element.
|
|
251
|
+
|
|
252
|
+
:param context: Identifies the formatting context for the element (block or inline).
|
|
253
|
+
:param width: Natural image width in pixels.
|
|
254
|
+
:param height: Natural image height in pixels.
|
|
255
|
+
:param alt: Alternate text.
|
|
256
|
+
:param title: Title text (a.k.a. image tooltip).
|
|
257
|
+
:param caption: Caption text (shown below figure).
|
|
258
|
+
"""
|
|
292
259
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
260
|
+
context: FormattingContext
|
|
261
|
+
width: Optional[int]
|
|
262
|
+
height: Optional[int]
|
|
263
|
+
alt: Optional[str]
|
|
264
|
+
title: Optional[str]
|
|
265
|
+
caption: Optional[str]
|
|
296
266
|
|
|
297
|
-
|
|
298
|
-
|
|
267
|
+
def __post_init__(self) -> None:
|
|
268
|
+
if self.caption is None and self.context is FormattingContext.BLOCK:
|
|
269
|
+
self.caption = self.title or self.alt
|
|
270
|
+
|
|
271
|
+
def as_dict(self) -> dict[str, str]:
|
|
272
|
+
attributes: dict[str, str] = {}
|
|
273
|
+
if self.context is FormattingContext.BLOCK:
|
|
274
|
+
attributes[AC_ATTR("align")] = "center"
|
|
275
|
+
attributes[AC_ATTR("layout")] = "center"
|
|
276
|
+
if self.width is not None:
|
|
277
|
+
attributes[AC_ATTR("original-width")] = str(self.width)
|
|
278
|
+
if self.height is not None:
|
|
279
|
+
attributes[AC_ATTR("original-height")] = str(self.height)
|
|
280
|
+
if self.width is not None:
|
|
281
|
+
attributes[AC_ATTR("custom-width")] = "true"
|
|
282
|
+
attributes[AC_ATTR("width")] = str(self.width)
|
|
283
|
+
|
|
284
|
+
elif self.context is FormattingContext.INLINE:
|
|
285
|
+
if self.width is not None:
|
|
286
|
+
attributes[AC_ATTR("width")] = str(self.width)
|
|
287
|
+
if self.height is not None:
|
|
288
|
+
attributes[AC_ATTR("height")] = str(self.height)
|
|
289
|
+
else:
|
|
290
|
+
raise NotImplementedError("match not exhaustive for enumeration")
|
|
299
291
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
pass
|
|
292
|
+
if self.alt is not None:
|
|
293
|
+
attributes.update({AC_ATTR("alt"): self.alt})
|
|
294
|
+
if self.title is not None:
|
|
295
|
+
attributes.update({AC_ATTR("title"): self.title})
|
|
296
|
+
return attributes
|
|
306
297
|
|
|
307
|
-
|
|
298
|
+
EMPTY_BLOCK: ClassVar["ImageAttributes"]
|
|
299
|
+
EMPTY_INLINE: ClassVar["ImageAttributes"]
|
|
300
|
+
|
|
301
|
+
@classmethod
|
|
302
|
+
def empty(cls, context: FormattingContext) -> "ImageAttributes":
|
|
303
|
+
if context is FormattingContext.BLOCK:
|
|
304
|
+
return cls.EMPTY_BLOCK
|
|
305
|
+
elif context is FormattingContext.INLINE:
|
|
306
|
+
return cls.EMPTY_INLINE
|
|
307
|
+
else:
|
|
308
|
+
raise NotImplementedError("match not exhaustive for enumeration")
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
ImageAttributes.EMPTY_BLOCK = ImageAttributes(FormattingContext.BLOCK, None, None, None, None, None)
|
|
312
|
+
ImageAttributes.EMPTY_INLINE = ImageAttributes(FormattingContext.INLINE, None, None, None, None, None)
|
|
308
313
|
|
|
309
314
|
|
|
310
315
|
@dataclass
|
|
@@ -319,6 +324,7 @@ class ConfluenceConverterOptions:
|
|
|
319
324
|
:param prefer_raster: Whether to choose PNG files over SVG files when available.
|
|
320
325
|
:param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
|
|
321
326
|
:param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
|
|
327
|
+
:param render_latex: Whether to pre-render LaTeX formulas into PNG/SVG images.
|
|
322
328
|
:param diagram_output_format: Target image format for diagrams.
|
|
323
329
|
:param webui_links: When true, convert relative URLs to Confluence Web UI links.
|
|
324
330
|
"""
|
|
@@ -328,10 +334,23 @@ class ConfluenceConverterOptions:
|
|
|
328
334
|
prefer_raster: bool = True
|
|
329
335
|
render_drawio: bool = False
|
|
330
336
|
render_mermaid: bool = False
|
|
337
|
+
render_latex: bool = False
|
|
331
338
|
diagram_output_format: Literal["png", "svg"] = "png"
|
|
332
339
|
webui_links: bool = False
|
|
333
340
|
|
|
334
341
|
|
|
342
|
+
@dataclass
|
|
343
|
+
class ImageData:
|
|
344
|
+
path: Path
|
|
345
|
+
description: Optional[str] = None
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
@dataclass
|
|
349
|
+
class EmbeddedFileData:
|
|
350
|
+
data: bytes
|
|
351
|
+
description: Optional[str] = None
|
|
352
|
+
|
|
353
|
+
|
|
335
354
|
class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
336
355
|
"Transforms a plain HTML tree into Confluence Storage Format."
|
|
337
356
|
|
|
@@ -339,10 +358,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
339
358
|
path: Path
|
|
340
359
|
base_dir: Path
|
|
341
360
|
root_dir: Path
|
|
342
|
-
toc:
|
|
361
|
+
toc: TableOfContentsBuilder
|
|
343
362
|
links: list[str]
|
|
344
|
-
images: list[
|
|
345
|
-
|
|
363
|
+
images: list[ImageData]
|
|
364
|
+
embedded_files: dict[str, EmbeddedFileData]
|
|
346
365
|
site_metadata: ConfluenceSiteMetadata
|
|
347
366
|
page_metadata: ConfluencePageCollection
|
|
348
367
|
|
|
@@ -363,28 +382,40 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
363
382
|
self.path = path
|
|
364
383
|
self.base_dir = path.parent
|
|
365
384
|
self.root_dir = root_dir
|
|
366
|
-
self.toc =
|
|
385
|
+
self.toc = TableOfContentsBuilder()
|
|
367
386
|
self.links = []
|
|
368
387
|
self.images = []
|
|
369
|
-
self.
|
|
388
|
+
self.embedded_files = {}
|
|
370
389
|
self.site_metadata = site_metadata
|
|
371
390
|
self.page_metadata = page_metadata
|
|
372
391
|
|
|
373
392
|
def _transform_heading(self, heading: ET._Element) -> None:
|
|
374
|
-
"
|
|
393
|
+
"""
|
|
394
|
+
Adds anchors to headings in the same document (if *heading anchors* is enabled).
|
|
395
|
+
|
|
396
|
+
Original:
|
|
397
|
+
```
|
|
398
|
+
<h1>Heading text</h1>
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
Transformed:
|
|
402
|
+
```
|
|
403
|
+
<h1><structured-macro name="anchor">...</structured-macro>Heading text</h1>
|
|
404
|
+
```
|
|
405
|
+
"""
|
|
375
406
|
|
|
376
407
|
for e in heading:
|
|
377
408
|
self.visit(e)
|
|
378
409
|
|
|
379
|
-
anchor =
|
|
410
|
+
anchor = AC_ELEM(
|
|
380
411
|
"structured-macro",
|
|
381
412
|
{
|
|
382
|
-
|
|
383
|
-
|
|
413
|
+
AC_ATTR("name"): "anchor",
|
|
414
|
+
AC_ATTR("schema-version"): "1",
|
|
384
415
|
},
|
|
385
|
-
|
|
416
|
+
AC_ELEM(
|
|
386
417
|
"parameter",
|
|
387
|
-
{
|
|
418
|
+
{AC_ATTR("name"): ""},
|
|
388
419
|
title_to_identifier(element_to_text(heading)),
|
|
389
420
|
),
|
|
390
421
|
)
|
|
@@ -395,7 +426,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
395
426
|
heading.text = None
|
|
396
427
|
|
|
397
428
|
def _warn_or_raise(self, msg: str) -> None:
|
|
398
|
-
"Emit a warning or raise an exception when a path points to a resource that doesn't exist."
|
|
429
|
+
"Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
|
|
399
430
|
|
|
400
431
|
if self.options.ignore_invalid_url:
|
|
401
432
|
LOGGER.warning(msg)
|
|
@@ -411,7 +442,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
411
442
|
* Links to documents in the source hierarchy are mapped into full Confluence URLs.
|
|
412
443
|
"""
|
|
413
444
|
|
|
414
|
-
|
|
445
|
+
# Confluence doesn't support `title` attribute on `<a>` elements
|
|
446
|
+
anchor.attrib.pop("title", None)
|
|
447
|
+
|
|
448
|
+
url = anchor.get("href")
|
|
415
449
|
if url is None or is_absolute_url(url):
|
|
416
450
|
return None
|
|
417
451
|
|
|
@@ -419,46 +453,52 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
419
453
|
relative_url: ParseResult = urlparse(url)
|
|
420
454
|
|
|
421
455
|
if not relative_url.scheme and not relative_url.netloc and not relative_url.path and not relative_url.params and not relative_url.query:
|
|
422
|
-
LOGGER.debug("Found
|
|
456
|
+
LOGGER.debug("Found same-page URL: %s", url)
|
|
423
457
|
if self.options.heading_anchors:
|
|
424
458
|
# <ac:link ac:anchor="anchor"><ac:link-body>...</ac:link-body></ac:link>
|
|
425
459
|
target = relative_url.fragment.lstrip("#")
|
|
426
|
-
link_body =
|
|
460
|
+
link_body = AC_ELEM("link-body", {}, *list(anchor))
|
|
427
461
|
link_body.text = anchor.text
|
|
428
|
-
link_wrapper =
|
|
462
|
+
link_wrapper = AC_ELEM(
|
|
429
463
|
"link",
|
|
430
464
|
{
|
|
431
|
-
|
|
465
|
+
AC_ATTR("anchor"): target,
|
|
432
466
|
},
|
|
433
467
|
link_body,
|
|
434
468
|
)
|
|
435
|
-
link_wrapper.tail = anchor.tail
|
|
436
469
|
return link_wrapper
|
|
437
470
|
else:
|
|
438
471
|
return None
|
|
439
472
|
|
|
440
|
-
#
|
|
441
|
-
|
|
442
|
-
|
|
473
|
+
# discard original value: relative links always require transformation
|
|
474
|
+
anchor.attrib.pop("href")
|
|
475
|
+
|
|
476
|
+
# convert the relative URL to absolute path based on the base path value
|
|
443
477
|
absolute_path = (self.base_dir / relative_url.path).resolve()
|
|
478
|
+
|
|
479
|
+
# look up the absolute path in the page metadata dictionary to discover the relative path within Confluence that should be used
|
|
444
480
|
if not is_directory_within(absolute_path, self.root_dir):
|
|
445
|
-
anchor.attrib.pop("href")
|
|
446
481
|
self._warn_or_raise(f"relative URL {url} points to outside root path: {self.root_dir}")
|
|
447
482
|
return None
|
|
448
483
|
|
|
484
|
+
if absolute_path.suffix == ".md":
|
|
485
|
+
return self._transform_page_link(anchor, relative_url, absolute_path)
|
|
486
|
+
else:
|
|
487
|
+
return self._transform_attachment_link(anchor, absolute_path)
|
|
488
|
+
|
|
489
|
+
def _transform_page_link(self, anchor: ET._Element, relative_url: ParseResult, absolute_path: Path) -> Optional[ET._Element]:
|
|
490
|
+
"""
|
|
491
|
+
Transforms links to other Markdown documents (Confluence pages).
|
|
492
|
+
"""
|
|
493
|
+
|
|
449
494
|
link_metadata = self.page_metadata.get(absolute_path)
|
|
450
495
|
if link_metadata is None:
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
LOGGER.warning(msg)
|
|
454
|
-
anchor.attrib.pop("href")
|
|
455
|
-
return None
|
|
456
|
-
else:
|
|
457
|
-
raise DocumentError(msg)
|
|
496
|
+
self._warn_or_raise(f"unable to find matching page for URL: {relative_url.geturl()}")
|
|
497
|
+
return None
|
|
458
498
|
|
|
459
499
|
relative_path = os.path.relpath(absolute_path, self.base_dir)
|
|
460
500
|
LOGGER.debug("Found link to page %s with metadata: %s", relative_path, link_metadata)
|
|
461
|
-
self.links.append(
|
|
501
|
+
self.links.append(relative_url.geturl())
|
|
462
502
|
|
|
463
503
|
if self.options.webui_links:
|
|
464
504
|
page_url = f"{self.site_metadata.base_path}pages/viewpage.action?pageId={link_metadata.page_id}"
|
|
@@ -470,7 +510,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
470
510
|
|
|
471
511
|
page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{encode_title(link_metadata.title)}"
|
|
472
512
|
|
|
473
|
-
|
|
513
|
+
transformed_url = ParseResult(
|
|
474
514
|
scheme="https",
|
|
475
515
|
netloc=self.site_metadata.domain,
|
|
476
516
|
path=page_url,
|
|
@@ -478,24 +518,83 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
478
518
|
query="",
|
|
479
519
|
fragment=relative_url.fragment,
|
|
480
520
|
)
|
|
481
|
-
transformed_url = urlunparse(components)
|
|
482
521
|
|
|
483
|
-
LOGGER.debug("Transformed relative URL: %s to URL: %s",
|
|
484
|
-
anchor.
|
|
522
|
+
LOGGER.debug("Transformed relative URL: %s to URL: %s", relative_url.geturl(), transformed_url.geturl())
|
|
523
|
+
anchor.set("href", transformed_url.geturl())
|
|
485
524
|
return None
|
|
486
525
|
|
|
487
|
-
def
|
|
488
|
-
"
|
|
526
|
+
def _transform_attachment_link(self, anchor: ET._Element, absolute_path: Path) -> Optional[ET._Element]:
|
|
527
|
+
"""
|
|
528
|
+
Transforms links to document binaries such as PDF, DOCX or XLSX.
|
|
529
|
+
"""
|
|
530
|
+
|
|
531
|
+
if not absolute_path.exists():
|
|
532
|
+
self._warn_or_raise(f"relative URL points to non-existing file: {absolute_path}")
|
|
533
|
+
return None
|
|
534
|
+
|
|
535
|
+
file_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
|
|
536
|
+
self.images.append(ImageData(absolute_path))
|
|
537
|
+
|
|
538
|
+
link_body = AC_ELEM("link-body", {}, *list(anchor))
|
|
539
|
+
link_body.text = anchor.text
|
|
540
|
+
link_wrapper = AC_ELEM(
|
|
541
|
+
"link",
|
|
542
|
+
{},
|
|
543
|
+
RI_ELEM("attachment", {RI_ATTR("filename"): file_name}),
|
|
544
|
+
link_body,
|
|
545
|
+
)
|
|
546
|
+
return link_wrapper
|
|
547
|
+
|
|
548
|
+
def _transform_status(self, color: str, caption: str) -> ET._Element:
|
|
549
|
+
macro_id = str(uuid.uuid4())
|
|
550
|
+
attributes = {
|
|
551
|
+
AC_ATTR("name"): "status",
|
|
552
|
+
AC_ATTR("schema-version"): "1",
|
|
553
|
+
AC_ATTR("macro-id"): macro_id,
|
|
554
|
+
}
|
|
555
|
+
if color != "gray":
|
|
556
|
+
return AC_ELEM(
|
|
557
|
+
"structured-macro",
|
|
558
|
+
attributes,
|
|
559
|
+
AC_ELEM(
|
|
560
|
+
"parameter",
|
|
561
|
+
{AC_ATTR("name"): "colour"},
|
|
562
|
+
color.title(),
|
|
563
|
+
),
|
|
564
|
+
AC_ELEM(
|
|
565
|
+
"parameter",
|
|
566
|
+
{AC_ATTR("name"): "title"},
|
|
567
|
+
caption,
|
|
568
|
+
),
|
|
569
|
+
)
|
|
570
|
+
else:
|
|
571
|
+
return AC_ELEM(
|
|
572
|
+
"structured-macro",
|
|
573
|
+
attributes,
|
|
574
|
+
AC_ELEM(
|
|
575
|
+
"parameter",
|
|
576
|
+
{AC_ATTR("name"): "title"},
|
|
577
|
+
caption,
|
|
578
|
+
),
|
|
579
|
+
)
|
|
489
580
|
|
|
490
|
-
|
|
581
|
+
def _transform_image(self, context: FormattingContext, image: ET._Element) -> ET._Element:
|
|
582
|
+
"Inserts an attached or external image."
|
|
491
583
|
|
|
584
|
+
src = image.get("src")
|
|
492
585
|
if not src:
|
|
493
586
|
raise DocumentError("image lacks `src` attribute")
|
|
494
587
|
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
588
|
+
alt = image.get("alt")
|
|
589
|
+
if alt is not None and src.startswith("urn:uuid:") and (color := status_images.get(src)) is not None:
|
|
590
|
+
return self._transform_status(color, alt)
|
|
591
|
+
|
|
592
|
+
title = image.get("title")
|
|
593
|
+
width = image.get("width")
|
|
594
|
+
height = image.get("height")
|
|
595
|
+
pixel_width = int(width) if width is not None and width.isdecimal() else None
|
|
596
|
+
pixel_height = int(height) if height is not None and height.isdecimal() else None
|
|
597
|
+
attrs = ImageAttributes(context, pixel_width, pixel_height, alt, title, None)
|
|
499
598
|
|
|
500
599
|
if is_absolute_url(src):
|
|
501
600
|
return self._transform_external_image(src, attrs)
|
|
@@ -504,39 +603,32 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
504
603
|
|
|
505
604
|
absolute_path = self._verify_image_path(path)
|
|
506
605
|
if absolute_path is None:
|
|
507
|
-
return self._create_missing(path, caption)
|
|
606
|
+
return self._create_missing(path, attrs.caption)
|
|
508
607
|
|
|
509
608
|
if absolute_path.name.endswith(".drawio.png") or absolute_path.name.endswith(".drawio.svg"):
|
|
510
609
|
return self._transform_drawio_image(absolute_path, attrs)
|
|
511
610
|
elif absolute_path.name.endswith(".drawio.xml") or absolute_path.name.endswith(".drawio"):
|
|
512
611
|
return self._transform_drawio(absolute_path, attrs)
|
|
612
|
+
elif absolute_path.name.endswith(".mmd") or absolute_path.name.endswith(".mermaid"):
|
|
613
|
+
return self._transform_external_mermaid(absolute_path, attrs)
|
|
513
614
|
else:
|
|
514
615
|
return self._transform_attached_image(absolute_path, attrs)
|
|
515
616
|
|
|
516
617
|
def _transform_external_image(self, url: str, attrs: ImageAttributes) -> ET._Element:
|
|
517
618
|
"Emits Confluence Storage Format XHTML for an external image."
|
|
518
619
|
|
|
519
|
-
attributes: dict[str, Any] = {
|
|
520
|
-
ET.QName(namespaces["ac"], "align"): "center",
|
|
521
|
-
ET.QName(namespaces["ac"], "layout"): "center",
|
|
522
|
-
}
|
|
523
|
-
if attrs.width is not None:
|
|
524
|
-
attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
|
|
525
|
-
if attrs.height is not None:
|
|
526
|
-
attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
|
|
527
|
-
|
|
528
620
|
elements: list[ET._Element] = []
|
|
529
621
|
elements.append(
|
|
530
|
-
|
|
622
|
+
RI_ELEM(
|
|
531
623
|
"url",
|
|
532
624
|
# refers to an external image
|
|
533
|
-
{
|
|
625
|
+
{RI_ATTR("value"): url},
|
|
534
626
|
)
|
|
535
627
|
)
|
|
536
|
-
if attrs.caption
|
|
537
|
-
elements.append(
|
|
628
|
+
if attrs.caption:
|
|
629
|
+
elements.append(AC_ELEM("caption", attrs.caption))
|
|
538
630
|
|
|
539
|
-
return
|
|
631
|
+
return AC_ELEM("image", attrs.as_dict(), *elements)
|
|
540
632
|
|
|
541
633
|
def _verify_image_path(self, path: Path) -> Optional[Path]:
|
|
542
634
|
"Checks whether an image path is safe to use."
|
|
@@ -557,13 +649,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
557
649
|
def _transform_attached_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
|
|
558
650
|
"Emits Confluence Storage Format XHTML for an attached raster or vector image."
|
|
559
651
|
|
|
560
|
-
if self.options.prefer_raster and absolute_path.
|
|
652
|
+
if self.options.prefer_raster and absolute_path.suffix == ".svg":
|
|
561
653
|
# prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
|
|
562
654
|
png_file = absolute_path.with_suffix(".png")
|
|
563
655
|
if png_file.exists():
|
|
564
656
|
absolute_path = png_file
|
|
565
657
|
|
|
566
|
-
self.images.append(absolute_path)
|
|
658
|
+
self.images.append(ImageData(absolute_path, attrs.alt))
|
|
567
659
|
image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
|
|
568
660
|
return self._create_attached_image(image_name, attrs)
|
|
569
661
|
|
|
@@ -573,15 +665,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
573
665
|
if not absolute_path.name.endswith(".drawio.xml") and not absolute_path.name.endswith(".drawio"):
|
|
574
666
|
raise DocumentError("invalid image format; expected: `*.drawio.xml` or `*.drawio`")
|
|
575
667
|
|
|
668
|
+
relative_path = path_relative_to(absolute_path, self.base_dir)
|
|
576
669
|
if self.options.render_drawio:
|
|
577
670
|
image_data = drawio.render_diagram(absolute_path, self.options.diagram_output_format)
|
|
578
|
-
|
|
579
|
-
image_filename =
|
|
580
|
-
self.embedded_images[image_filename] = image_data
|
|
671
|
+
image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
|
|
672
|
+
self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
|
|
581
673
|
return self._create_attached_image(image_filename, attrs)
|
|
582
674
|
else:
|
|
583
|
-
self.images.append(absolute_path)
|
|
584
|
-
image_filename = attachment_name(
|
|
675
|
+
self.images.append(ImageData(absolute_path, attrs.alt))
|
|
676
|
+
image_filename = attachment_name(relative_path)
|
|
585
677
|
return self._create_drawio(image_filename, attrs)
|
|
586
678
|
|
|
587
679
|
def _transform_drawio_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
|
|
@@ -596,72 +688,63 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
596
688
|
# extract embedded editable diagram and upload as *.drawio
|
|
597
689
|
image_data = drawio.extract_diagram(absolute_path)
|
|
598
690
|
image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
|
|
599
|
-
self.
|
|
691
|
+
self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
|
|
600
692
|
|
|
601
693
|
return self._create_drawio(image_filename, attrs)
|
|
602
694
|
|
|
603
695
|
def _create_attached_image(self, image_name: str, attrs: ImageAttributes) -> ET._Element:
|
|
604
696
|
"An image embedded into the page, linking to an attachment."
|
|
605
697
|
|
|
606
|
-
attributes: dict[str, Any] = {
|
|
607
|
-
ET.QName(namespaces["ac"], "align"): "center",
|
|
608
|
-
ET.QName(namespaces["ac"], "layout"): "center",
|
|
609
|
-
}
|
|
610
|
-
if attrs.width is not None:
|
|
611
|
-
attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
|
|
612
|
-
if attrs.height is not None:
|
|
613
|
-
attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
|
|
614
|
-
|
|
615
698
|
elements: list[ET._Element] = []
|
|
616
699
|
elements.append(
|
|
617
|
-
|
|
700
|
+
RI_ELEM(
|
|
618
701
|
"attachment",
|
|
619
702
|
# refers to an attachment uploaded alongside the page
|
|
620
|
-
{
|
|
703
|
+
{RI_ATTR("filename"): image_name},
|
|
621
704
|
)
|
|
622
705
|
)
|
|
623
|
-
if attrs.caption
|
|
624
|
-
elements.append(
|
|
706
|
+
if attrs.caption:
|
|
707
|
+
elements.append(AC_ELEM("caption", attrs.caption))
|
|
625
708
|
|
|
626
|
-
return
|
|
709
|
+
return AC_ELEM("image", attrs.as_dict(), *elements)
|
|
627
710
|
|
|
628
711
|
def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ET._Element:
|
|
629
712
|
"A draw.io diagram embedded into the page, linking to an attachment."
|
|
630
713
|
|
|
631
714
|
parameters: list[ET._Element] = [
|
|
632
|
-
|
|
715
|
+
AC_ELEM(
|
|
633
716
|
"parameter",
|
|
634
|
-
{
|
|
717
|
+
{AC_ATTR("name"): "diagramName"},
|
|
635
718
|
filename,
|
|
636
719
|
),
|
|
637
720
|
]
|
|
638
721
|
if attrs.width is not None:
|
|
639
722
|
parameters.append(
|
|
640
|
-
|
|
723
|
+
AC_ELEM(
|
|
641
724
|
"parameter",
|
|
642
|
-
{
|
|
643
|
-
attrs.width,
|
|
725
|
+
{AC_ATTR("name"): "width"},
|
|
726
|
+
str(attrs.width),
|
|
644
727
|
),
|
|
645
728
|
)
|
|
646
729
|
if attrs.height is not None:
|
|
647
730
|
parameters.append(
|
|
648
|
-
|
|
731
|
+
AC_ELEM(
|
|
649
732
|
"parameter",
|
|
650
|
-
{
|
|
651
|
-
attrs.height,
|
|
733
|
+
{AC_ATTR("name"): "height"},
|
|
734
|
+
str(attrs.height),
|
|
652
735
|
),
|
|
653
736
|
)
|
|
654
737
|
|
|
655
738
|
local_id = str(uuid.uuid4())
|
|
656
739
|
macro_id = str(uuid.uuid4())
|
|
657
|
-
return
|
|
740
|
+
return AC_ELEM(
|
|
658
741
|
"structured-macro",
|
|
659
742
|
{
|
|
660
|
-
|
|
661
|
-
|
|
743
|
+
AC_ATTR("name"): "drawio",
|
|
744
|
+
AC_ATTR("schema-version"): "1",
|
|
662
745
|
"data-layout": "default",
|
|
663
|
-
|
|
664
|
-
|
|
746
|
+
AC_ATTR("local-id"): local_id,
|
|
747
|
+
AC_ATTR("macro-id"): macro_id,
|
|
665
748
|
},
|
|
666
749
|
*parameters,
|
|
667
750
|
)
|
|
@@ -672,21 +755,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
672
755
|
message = HTML.p("Missing image: ", HTML.code(path.as_posix()))
|
|
673
756
|
if caption is not None:
|
|
674
757
|
content = [
|
|
675
|
-
|
|
758
|
+
AC_ELEM(
|
|
676
759
|
"parameter",
|
|
677
|
-
{
|
|
760
|
+
{AC_ATTR("name"): "title"},
|
|
678
761
|
caption,
|
|
679
762
|
),
|
|
680
|
-
|
|
763
|
+
AC_ELEM("rich-text-body", {}, message),
|
|
681
764
|
]
|
|
682
765
|
else:
|
|
683
|
-
content = [
|
|
766
|
+
content = [AC_ELEM("rich-text-body", {}, message)]
|
|
684
767
|
|
|
685
|
-
return
|
|
768
|
+
return AC_ELEM(
|
|
686
769
|
"structured-macro",
|
|
687
770
|
{
|
|
688
|
-
|
|
689
|
-
|
|
771
|
+
AC_ATTR("name"): "warning",
|
|
772
|
+
AC_ATTR("schema-version"): "1",
|
|
690
773
|
},
|
|
691
774
|
*content,
|
|
692
775
|
)
|
|
@@ -694,107 +777,132 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
694
777
|
def _transform_code_block(self, code: ET._Element) -> ET._Element:
|
|
695
778
|
"Transforms a code block."
|
|
696
779
|
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
if m:
|
|
701
|
-
language = m.group(1)
|
|
780
|
+
if language_class := code.get("class"):
|
|
781
|
+
if m := re.match("^language-(.*)$", language_class):
|
|
782
|
+
language_name = m.group(1)
|
|
702
783
|
else:
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
784
|
+
language_name = None
|
|
785
|
+
else:
|
|
786
|
+
language_name = None
|
|
787
|
+
|
|
788
|
+
# translate name to standard name for (programming) language
|
|
789
|
+
if language_name is not None:
|
|
790
|
+
language_id = _LANGUAGES.get(language_name)
|
|
791
|
+
else:
|
|
792
|
+
language_id = None
|
|
793
|
+
|
|
706
794
|
content: str = code.text or ""
|
|
707
795
|
content = content.rstrip()
|
|
708
796
|
|
|
709
|
-
if
|
|
710
|
-
return self.
|
|
797
|
+
if language_id == "mermaid":
|
|
798
|
+
return self._transform_fenced_mermaid(content)
|
|
711
799
|
|
|
712
|
-
return
|
|
800
|
+
return AC_ELEM(
|
|
713
801
|
"structured-macro",
|
|
714
802
|
{
|
|
715
|
-
|
|
716
|
-
|
|
803
|
+
AC_ATTR("name"): "code",
|
|
804
|
+
AC_ATTR("schema-version"): "1",
|
|
717
805
|
},
|
|
718
|
-
|
|
806
|
+
AC_ELEM(
|
|
719
807
|
"parameter",
|
|
720
|
-
{
|
|
721
|
-
"
|
|
808
|
+
{AC_ATTR("name"): "language"},
|
|
809
|
+
language_id or "none",
|
|
722
810
|
),
|
|
723
|
-
|
|
724
|
-
"parameter",
|
|
725
|
-
{ET.QName(namespaces["ac"], "name"): "language"},
|
|
726
|
-
language,
|
|
727
|
-
),
|
|
728
|
-
AC("plain-text-body", ET.CDATA(content)),
|
|
811
|
+
AC_ELEM("plain-text-body", ET.CDATA(content)),
|
|
729
812
|
)
|
|
730
813
|
|
|
731
|
-
def
|
|
732
|
-
"
|
|
814
|
+
def _transform_external_mermaid(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
|
|
815
|
+
"Emits Confluence Storage Format XHTML for a Mermaid diagram read from an external file."
|
|
816
|
+
|
|
817
|
+
if not absolute_path.name.endswith(".mmd") and not absolute_path.name.endswith(".mermaid"):
|
|
818
|
+
raise DocumentError("invalid image format; expected: `*.mmd` or `*.mermaid`")
|
|
819
|
+
|
|
820
|
+
relative_path = path_relative_to(absolute_path, self.base_dir)
|
|
821
|
+
if self.options.render_mermaid:
|
|
822
|
+
with open(absolute_path, "r", encoding="utf-8") as f:
|
|
823
|
+
content = f.read()
|
|
824
|
+
image_data = mermaid.render_diagram(content, self.options.diagram_output_format)
|
|
825
|
+
image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
|
|
826
|
+
self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
|
|
827
|
+
return self._create_attached_image(image_filename, attrs)
|
|
828
|
+
else:
|
|
829
|
+
self.images.append(ImageData(absolute_path, attrs.alt))
|
|
830
|
+
mermaid_filename = attachment_name(relative_path)
|
|
831
|
+
return self._create_mermaid_embed(mermaid_filename)
|
|
832
|
+
|
|
833
|
+
def _transform_fenced_mermaid(self, content: str) -> ET._Element:
|
|
834
|
+
"Emits Confluence Storage Format XHTML for a Mermaid diagram defined in a fenced code block."
|
|
733
835
|
|
|
734
836
|
if self.options.render_mermaid:
|
|
735
837
|
image_data = mermaid.render_diagram(content, self.options.diagram_output_format)
|
|
736
838
|
image_hash = hashlib.md5(image_data).hexdigest()
|
|
737
839
|
image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
|
|
738
|
-
self.
|
|
739
|
-
return self._create_attached_image(image_filename, ImageAttributes
|
|
840
|
+
self.embedded_files[image_filename] = EmbeddedFileData(image_data)
|
|
841
|
+
return self._create_attached_image(image_filename, ImageAttributes.EMPTY_BLOCK)
|
|
740
842
|
else:
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
),
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
843
|
+
mermaid_data = content.encode("utf-8")
|
|
844
|
+
mermaid_hash = hashlib.md5(mermaid_data).hexdigest()
|
|
845
|
+
mermaid_filename = attachment_name(f"embedded_{mermaid_hash}.mmd")
|
|
846
|
+
self.embedded_files[mermaid_filename] = EmbeddedFileData(mermaid_data)
|
|
847
|
+
return self._create_mermaid_embed(mermaid_filename)
|
|
848
|
+
|
|
849
|
+
def _create_mermaid_embed(self, filename: str) -> ET._Element:
|
|
850
|
+
"A Mermaid diagram, linking to an attachment that captures the Mermaid source."
|
|
851
|
+
|
|
852
|
+
local_id = str(uuid.uuid4())
|
|
853
|
+
macro_id = str(uuid.uuid4())
|
|
854
|
+
return AC_ELEM(
|
|
855
|
+
"structured-macro",
|
|
856
|
+
{
|
|
857
|
+
AC_ATTR("name"): "mermaid-cloud",
|
|
858
|
+
AC_ATTR("schema-version"): "1",
|
|
859
|
+
"data-layout": "default",
|
|
860
|
+
AC_ATTR("local-id"): local_id,
|
|
861
|
+
AC_ATTR("macro-id"): macro_id,
|
|
862
|
+
},
|
|
863
|
+
AC_ELEM(
|
|
864
|
+
"parameter",
|
|
865
|
+
{AC_ATTR("name"): "filename"},
|
|
866
|
+
filename,
|
|
867
|
+
),
|
|
868
|
+
AC_ELEM(
|
|
869
|
+
"parameter",
|
|
870
|
+
{AC_ATTR("name"): "toolbar"},
|
|
871
|
+
"bottom",
|
|
872
|
+
),
|
|
873
|
+
AC_ELEM(
|
|
874
|
+
"parameter",
|
|
875
|
+
{AC_ATTR("name"): "zoom"},
|
|
876
|
+
"fit",
|
|
877
|
+
),
|
|
878
|
+
AC_ELEM("parameter", {AC_ATTR("name"): "revision"}, "1"),
|
|
879
|
+
)
|
|
773
880
|
|
|
774
881
|
def _transform_toc(self, code: ET._Element) -> ET._Element:
|
|
775
882
|
"Creates a table of contents, constructed from headings in the document."
|
|
776
883
|
|
|
777
|
-
return
|
|
884
|
+
return AC_ELEM(
|
|
778
885
|
"structured-macro",
|
|
779
886
|
{
|
|
780
|
-
|
|
781
|
-
|
|
887
|
+
AC_ATTR("name"): "toc",
|
|
888
|
+
AC_ATTR("schema-version"): "1",
|
|
889
|
+
"data-layout": "default",
|
|
782
890
|
},
|
|
783
|
-
|
|
784
|
-
|
|
891
|
+
AC_ELEM("parameter", {AC_ATTR("name"): "outline"}, "clear"),
|
|
892
|
+
AC_ELEM("parameter", {AC_ATTR("name"): "style"}, "default"),
|
|
785
893
|
)
|
|
786
894
|
|
|
787
895
|
def _transform_listing(self, code: ET._Element) -> ET._Element:
|
|
788
896
|
"Creates a list of child pages."
|
|
789
897
|
|
|
790
|
-
return
|
|
898
|
+
return AC_ELEM(
|
|
791
899
|
"structured-macro",
|
|
792
900
|
{
|
|
793
|
-
|
|
794
|
-
|
|
901
|
+
AC_ATTR("name"): "children",
|
|
902
|
+
AC_ATTR("schema-version"): "2",
|
|
795
903
|
"data-layout": "default",
|
|
796
904
|
},
|
|
797
|
-
|
|
905
|
+
AC_ELEM("parameter", {AC_ATTR("name"): "allChildren"}, "true"),
|
|
798
906
|
)
|
|
799
907
|
|
|
800
908
|
def _transform_admonition(self, elem: ET._Element) -> ET._Element:
|
|
@@ -805,8 +913,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
805
913
|
syntax into one of the Confluence structured macros *info*, *tip*, *note*, or *warning*.
|
|
806
914
|
"""
|
|
807
915
|
|
|
916
|
+
if len(elem) < 1:
|
|
917
|
+
raise DocumentError("empty admonition")
|
|
918
|
+
|
|
808
919
|
# <div class="admonition note">
|
|
809
|
-
class_list = elem.
|
|
920
|
+
class_list = elem.get("class", "").split(" ")
|
|
810
921
|
class_name: Optional[str] = None
|
|
811
922
|
if "info" in class_list:
|
|
812
923
|
class_name = "info"
|
|
@@ -824,33 +935,36 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
824
935
|
self.visit(e)
|
|
825
936
|
|
|
826
937
|
# <p class="admonition-title">Note</p>
|
|
827
|
-
if "admonition-title" in elem[0].
|
|
938
|
+
if "admonition-title" in elem[0].get("class", "").split(" "):
|
|
828
939
|
content = [
|
|
829
|
-
|
|
940
|
+
AC_ELEM(
|
|
830
941
|
"parameter",
|
|
831
|
-
{
|
|
942
|
+
{AC_ATTR("name"): "title"},
|
|
832
943
|
elem[0].text or "",
|
|
833
944
|
),
|
|
834
|
-
|
|
945
|
+
AC_ELEM("rich-text-body", {}, *list(elem[1:])),
|
|
835
946
|
]
|
|
836
947
|
else:
|
|
837
|
-
content = [
|
|
948
|
+
content = [AC_ELEM("rich-text-body", {}, *list(elem))]
|
|
838
949
|
|
|
839
|
-
return
|
|
950
|
+
return AC_ELEM(
|
|
840
951
|
"structured-macro",
|
|
841
952
|
{
|
|
842
|
-
|
|
843
|
-
|
|
953
|
+
AC_ATTR("name"): class_name,
|
|
954
|
+
AC_ATTR("schema-version"): "1",
|
|
844
955
|
},
|
|
845
956
|
*content,
|
|
846
957
|
)
|
|
847
958
|
|
|
848
|
-
def _transform_github_alert(self,
|
|
959
|
+
def _transform_github_alert(self, blockquote: ET._Element) -> ET._Element:
|
|
849
960
|
"""
|
|
850
961
|
Creates a GitHub-style panel, normally triggered with a block-quote starting with a capitalized string such as `[!TIP]`.
|
|
851
962
|
"""
|
|
852
963
|
|
|
853
|
-
|
|
964
|
+
if len(blockquote) < 1:
|
|
965
|
+
raise DocumentError("empty GitHub alert")
|
|
966
|
+
|
|
967
|
+
content = blockquote[0]
|
|
854
968
|
if content.text is None:
|
|
855
969
|
raise DocumentError("empty content")
|
|
856
970
|
|
|
@@ -875,9 +989,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
875
989
|
else:
|
|
876
990
|
raise DocumentError(f"unsupported GitHub alert: {alert}")
|
|
877
991
|
|
|
878
|
-
return self._transform_alert(
|
|
992
|
+
return self._transform_alert(blockquote, class_name, skip)
|
|
879
993
|
|
|
880
|
-
def _transform_gitlab_alert(self,
|
|
994
|
+
def _transform_gitlab_alert(self, blockquote: ET._Element) -> ET._Element:
|
|
881
995
|
"""
|
|
882
996
|
Creates a classic GitLab-style panel.
|
|
883
997
|
|
|
@@ -885,7 +999,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
885
999
|
This syntax does not use Hugo shortcode.
|
|
886
1000
|
"""
|
|
887
1001
|
|
|
888
|
-
|
|
1002
|
+
if len(blockquote) < 1:
|
|
1003
|
+
raise DocumentError("empty GitLab alert")
|
|
1004
|
+
|
|
1005
|
+
content = blockquote[0]
|
|
889
1006
|
if content.text is None:
|
|
890
1007
|
raise DocumentError("empty content")
|
|
891
1008
|
|
|
@@ -908,69 +1025,85 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
908
1025
|
else:
|
|
909
1026
|
raise DocumentError(f"unsupported GitLab alert: {alert}")
|
|
910
1027
|
|
|
911
|
-
return self._transform_alert(
|
|
1028
|
+
return self._transform_alert(blockquote, class_name, skip)
|
|
912
1029
|
|
|
913
|
-
def _transform_alert(self,
|
|
1030
|
+
def _transform_alert(self, blockquote: ET._Element, class_name: Optional[str], skip: int) -> ET._Element:
|
|
914
1031
|
"""
|
|
915
1032
|
Creates an info, tip, note or warning panel from a GitHub or GitLab alert.
|
|
916
1033
|
|
|
917
|
-
Transforms
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
1034
|
+
Transforms GitHub alert or GitLab alert syntax into one of the Confluence structured macros *info*, *tip*, *note*, or *warning*.
|
|
1035
|
+
|
|
1036
|
+
:see: https://docs.github.com/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
|
|
1037
|
+
:see: https://docs.gitlab.com/ee/development/documentation/styleguide/#alert-boxes
|
|
921
1038
|
"""
|
|
922
1039
|
|
|
923
|
-
content =
|
|
1040
|
+
content = blockquote[0]
|
|
924
1041
|
if content.text is None:
|
|
925
1042
|
raise DocumentError("empty content")
|
|
926
1043
|
|
|
927
1044
|
if class_name is None:
|
|
928
1045
|
raise DocumentError("not an alert")
|
|
929
1046
|
|
|
930
|
-
for e in
|
|
1047
|
+
for e in blockquote:
|
|
931
1048
|
self.visit(e)
|
|
932
1049
|
|
|
933
1050
|
content.text = content.text[skip:]
|
|
934
|
-
return
|
|
1051
|
+
return AC_ELEM(
|
|
935
1052
|
"structured-macro",
|
|
936
1053
|
{
|
|
937
|
-
|
|
938
|
-
|
|
1054
|
+
AC_ATTR("name"): class_name,
|
|
1055
|
+
AC_ATTR("schema-version"): "1",
|
|
939
1056
|
},
|
|
940
|
-
|
|
1057
|
+
AC_ELEM("rich-text-body", {}, *list(blockquote)),
|
|
941
1058
|
)
|
|
942
1059
|
|
|
943
|
-
def _transform_section(self,
|
|
1060
|
+
def _transform_section(self, details: ET._Element) -> ET._Element:
|
|
944
1061
|
"""
|
|
945
1062
|
Creates a collapsed section.
|
|
946
1063
|
|
|
947
|
-
Transforms
|
|
948
|
-
|
|
949
|
-
|
|
1064
|
+
Transforms a GitHub collapsed section syntax into the Confluence structured macro *expand*.
|
|
1065
|
+
|
|
1066
|
+
:see: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections
|
|
950
1067
|
"""
|
|
951
1068
|
|
|
952
|
-
|
|
1069
|
+
summary = details[0]
|
|
1070
|
+
if summary.tag != "summary":
|
|
953
1071
|
raise DocumentError("expected: `<summary>` as first direct child of `<details>`")
|
|
954
|
-
if
|
|
1072
|
+
if details.text is not None or summary.tail is not None:
|
|
1073
|
+
# when `<details>` has attribute `markdown=1`, content is parsed as Markdown:
|
|
1074
|
+
# ```
|
|
1075
|
+
# <details>
|
|
1076
|
+
# <summary>...</summary>
|
|
1077
|
+
# <p>Text with <em>emphasis</em>.</p>
|
|
1078
|
+
# </details>
|
|
1079
|
+
# ```
|
|
1080
|
+
#
|
|
1081
|
+
# when `<details>` lacks attribute `markdown=1`, content is passed down as raw HTML, partly as `text` of `<detail>` or `tail` of `<summary>`:
|
|
1082
|
+
# ```
|
|
1083
|
+
# <details>
|
|
1084
|
+
# <summary>...</summary>
|
|
1085
|
+
# Text with *emphasis*.
|
|
1086
|
+
# </details>
|
|
955
1087
|
raise DocumentError('expected: attribute `markdown="1"` on `<details>`')
|
|
956
1088
|
|
|
957
|
-
|
|
958
|
-
|
|
1089
|
+
summary_text = element_to_text(summary)
|
|
1090
|
+
details.remove(summary)
|
|
959
1091
|
|
|
960
|
-
|
|
1092
|
+
# transform Markdown to Confluence within collapsed section content
|
|
1093
|
+
self.visit(details)
|
|
961
1094
|
|
|
962
|
-
return
|
|
1095
|
+
return AC_ELEM(
|
|
963
1096
|
"structured-macro",
|
|
964
1097
|
{
|
|
965
|
-
|
|
966
|
-
|
|
1098
|
+
AC_ATTR("name"): "expand",
|
|
1099
|
+
AC_ATTR("schema-version"): "1",
|
|
967
1100
|
},
|
|
968
|
-
|
|
1101
|
+
AC_ELEM(
|
|
969
1102
|
"parameter",
|
|
970
|
-
{
|
|
971
|
-
|
|
1103
|
+
{AC_ATTR("name"): "title"},
|
|
1104
|
+
summary_text,
|
|
972
1105
|
),
|
|
973
|
-
|
|
1106
|
+
AC_ELEM("rich-text-body", {}, *list(details)),
|
|
974
1107
|
)
|
|
975
1108
|
|
|
976
1109
|
def _transform_emoji(self, elem: ET._Element) -> ET._Element:
|
|
@@ -978,23 +1111,59 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
978
1111
|
Inserts an inline emoji character.
|
|
979
1112
|
"""
|
|
980
1113
|
|
|
981
|
-
shortname = elem.
|
|
982
|
-
unicode = elem.
|
|
1114
|
+
shortname = elem.get("data-shortname", "")
|
|
1115
|
+
unicode = elem.get("data-unicode", None)
|
|
983
1116
|
alt = elem.text or ""
|
|
984
1117
|
|
|
985
1118
|
# <ac:emoticon ac:name="wink" ac:emoji-shortname=":wink:" ac:emoji-id="1f609" ac:emoji-fallback="😉"/>
|
|
986
|
-
|
|
987
|
-
# <ac:emoticon ac:name="blue-star" ac:emoji-shortname=":heavy_minus_sign:" ac:emoji-id="2796" ac:emoji-fallback="➖"/>
|
|
988
|
-
return AC(
|
|
1119
|
+
return AC_ELEM(
|
|
989
1120
|
"emoticon",
|
|
990
1121
|
{
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1122
|
+
AC_ATTR("name"): shortname,
|
|
1123
|
+
AC_ATTR("emoji-shortname"): f":{shortname}:",
|
|
1124
|
+
AC_ATTR("emoji-id"): unicode,
|
|
1125
|
+
AC_ATTR("emoji-fallback"): alt,
|
|
995
1126
|
},
|
|
996
1127
|
)
|
|
997
1128
|
|
|
1129
|
+
def _transform_mark(self, mark: ET._Element) -> ET._Element:
|
|
1130
|
+
"""
|
|
1131
|
+
Adds inline highlighting to text.
|
|
1132
|
+
"""
|
|
1133
|
+
|
|
1134
|
+
attrs = dict(mark.items())
|
|
1135
|
+
old_style = attrs.get("style")
|
|
1136
|
+
new_style = "background-color: rgb(254,222,200);"
|
|
1137
|
+
if old_style is not None:
|
|
1138
|
+
new_style += f" {old_style}"
|
|
1139
|
+
attrs["style"] = new_style
|
|
1140
|
+
span = HTML("span", attrs, *list(mark))
|
|
1141
|
+
span.text = mark.text
|
|
1142
|
+
return span
|
|
1143
|
+
|
|
1144
|
+
def _transform_latex(self, elem: ET._Element, context: FormattingContext) -> ET._Element:
|
|
1145
|
+
"""
|
|
1146
|
+
Creates an image rendering of a LaTeX formula with Matplotlib.
|
|
1147
|
+
"""
|
|
1148
|
+
|
|
1149
|
+
content = elem.text
|
|
1150
|
+
if not content:
|
|
1151
|
+
raise DocumentError("empty LaTeX formula")
|
|
1152
|
+
|
|
1153
|
+
image_data = render_latex(content, format=self.options.diagram_output_format)
|
|
1154
|
+
if self.options.diagram_output_format == "png":
|
|
1155
|
+
width, height = get_png_dimensions(data=image_data)
|
|
1156
|
+
image_data = remove_png_chunks(["pHYs"], source_data=image_data)
|
|
1157
|
+
attrs = ImageAttributes(context, width, height, content, None, "")
|
|
1158
|
+
else:
|
|
1159
|
+
attrs = ImageAttributes.empty(context)
|
|
1160
|
+
|
|
1161
|
+
image_hash = hashlib.md5(image_data).hexdigest()
|
|
1162
|
+
image_filename = attachment_name(f"formula_{image_hash}.{self.options.diagram_output_format}")
|
|
1163
|
+
self.embedded_files[image_filename] = EmbeddedFileData(image_data, content)
|
|
1164
|
+
image = self._create_attached_image(image_filename, attrs)
|
|
1165
|
+
return image
|
|
1166
|
+
|
|
998
1167
|
def _transform_inline_math(self, elem: ET._Element) -> ET._Element:
|
|
999
1168
|
"""
|
|
1000
1169
|
Creates an inline LaTeX formula using the Confluence extension "LaTeX Math for Confluence - Math Formula & Equations".
|
|
@@ -1002,30 +1171,32 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1002
1171
|
:see: https://help.narva.net/latex-math-for-confluence/
|
|
1003
1172
|
"""
|
|
1004
1173
|
|
|
1005
|
-
content = elem.text
|
|
1174
|
+
content = elem.text
|
|
1006
1175
|
if not content:
|
|
1007
1176
|
raise DocumentError("empty inline LaTeX formula")
|
|
1008
1177
|
|
|
1009
1178
|
LOGGER.debug("Found inline LaTeX formula: %s", content)
|
|
1010
1179
|
|
|
1180
|
+
if self.options.render_latex:
|
|
1181
|
+
return self._transform_latex(elem, FormattingContext.INLINE)
|
|
1182
|
+
|
|
1011
1183
|
local_id = str(uuid.uuid4())
|
|
1012
1184
|
macro_id = str(uuid.uuid4())
|
|
1013
|
-
macro =
|
|
1185
|
+
macro = AC_ELEM(
|
|
1014
1186
|
"structured-macro",
|
|
1015
1187
|
{
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1188
|
+
AC_ATTR("name"): "eazy-math-inline",
|
|
1189
|
+
AC_ATTR("schema-version"): "1",
|
|
1190
|
+
AC_ATTR("local-id"): local_id,
|
|
1191
|
+
AC_ATTR("macro-id"): macro_id,
|
|
1020
1192
|
},
|
|
1021
|
-
|
|
1193
|
+
AC_ELEM(
|
|
1022
1194
|
"parameter",
|
|
1023
|
-
{
|
|
1195
|
+
{AC_ATTR("name"): "body"},
|
|
1024
1196
|
content,
|
|
1025
1197
|
),
|
|
1026
|
-
|
|
1198
|
+
AC_ELEM("parameter", {AC_ATTR("name"): "align"}, "center"),
|
|
1027
1199
|
)
|
|
1028
|
-
macro.tail = elem.tail # chain sibling text node that immediately follows original element
|
|
1029
1200
|
return macro
|
|
1030
1201
|
|
|
1031
1202
|
def _transform_block_math(self, elem: ET._Element) -> ET._Element:
|
|
@@ -1035,30 +1206,33 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1035
1206
|
:see: https://help.narva.net/latex-math-for-confluence/
|
|
1036
1207
|
"""
|
|
1037
1208
|
|
|
1038
|
-
content = elem.text
|
|
1209
|
+
content = elem.text
|
|
1039
1210
|
if not content:
|
|
1040
1211
|
raise DocumentError("empty block-level LaTeX formula")
|
|
1041
1212
|
|
|
1042
1213
|
LOGGER.debug("Found block-level LaTeX formula: %s", content)
|
|
1043
1214
|
|
|
1215
|
+
if self.options.render_latex:
|
|
1216
|
+
return self._transform_latex(elem, FormattingContext.BLOCK)
|
|
1217
|
+
|
|
1044
1218
|
local_id = str(uuid.uuid4())
|
|
1045
1219
|
macro_id = str(uuid.uuid4())
|
|
1046
1220
|
|
|
1047
|
-
return
|
|
1221
|
+
return AC_ELEM(
|
|
1048
1222
|
"structured-macro",
|
|
1049
1223
|
{
|
|
1050
|
-
|
|
1051
|
-
|
|
1224
|
+
AC_ATTR("name"): "easy-math-block",
|
|
1225
|
+
AC_ATTR("schema-version"): "1",
|
|
1052
1226
|
"data-layout": "default",
|
|
1053
|
-
|
|
1054
|
-
|
|
1227
|
+
AC_ATTR("local-id"): local_id,
|
|
1228
|
+
AC_ATTR("macro-id"): macro_id,
|
|
1055
1229
|
},
|
|
1056
|
-
|
|
1230
|
+
AC_ELEM(
|
|
1057
1231
|
"parameter",
|
|
1058
|
-
{
|
|
1232
|
+
{AC_ATTR("name"): "body"},
|
|
1059
1233
|
content,
|
|
1060
1234
|
),
|
|
1061
|
-
|
|
1235
|
+
AC_ELEM("parameter", {AC_ATTR("name"): "align"}, "center"),
|
|
1062
1236
|
)
|
|
1063
1237
|
|
|
1064
1238
|
def _transform_footnote_ref(self, elem: ET._Element) -> None:
|
|
@@ -1078,7 +1252,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1078
1252
|
raise DocumentError("expected: attribute `id` of format `fnref:NAME` applied on `<sup>` for a footnote reference")
|
|
1079
1253
|
footnote_ref = ref_id.removeprefix("fnref:")
|
|
1080
1254
|
|
|
1081
|
-
link = elem
|
|
1255
|
+
link = next((elem.iterchildren(tag="a")), None)
|
|
1256
|
+
if link is None:
|
|
1257
|
+
raise DocumentError("expected: `<a>` as the first HTML element in a footnote reference")
|
|
1082
1258
|
def_href = link.attrib.pop("href", "")
|
|
1083
1259
|
if not def_href.startswith("#fn:"):
|
|
1084
1260
|
raise DocumentError("expected: attribute `href` of format `#fn:NAME` applied on `<a>` for a footnote reference")
|
|
@@ -1090,26 +1266,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1090
1266
|
elem.remove(link)
|
|
1091
1267
|
|
|
1092
1268
|
# build new anchor for footnote reference
|
|
1093
|
-
ref_anchor =
|
|
1269
|
+
ref_anchor = AC_ELEM(
|
|
1094
1270
|
"structured-macro",
|
|
1095
1271
|
{
|
|
1096
|
-
|
|
1097
|
-
|
|
1272
|
+
AC_ATTR("name"): "anchor",
|
|
1273
|
+
AC_ATTR("schema-version"): "1",
|
|
1098
1274
|
},
|
|
1099
|
-
|
|
1275
|
+
AC_ELEM(
|
|
1100
1276
|
"parameter",
|
|
1101
|
-
{
|
|
1277
|
+
{AC_ATTR("name"): ""},
|
|
1102
1278
|
f"footnote-ref-{footnote_ref}",
|
|
1103
1279
|
),
|
|
1104
1280
|
)
|
|
1105
1281
|
|
|
1106
1282
|
# build new link to footnote definition at the end of page
|
|
1107
|
-
def_link =
|
|
1283
|
+
def_link = AC_ELEM(
|
|
1108
1284
|
"link",
|
|
1109
1285
|
{
|
|
1110
|
-
|
|
1286
|
+
AC_ATTR("anchor"): f"footnote-def-{footnote_def}",
|
|
1111
1287
|
},
|
|
1112
|
-
|
|
1288
|
+
AC_ELEM("link-body", ET.CDATA(text)),
|
|
1113
1289
|
)
|
|
1114
1290
|
|
|
1115
1291
|
# append children synthesized for Confluence
|
|
@@ -1132,18 +1308,28 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1132
1308
|
```
|
|
1133
1309
|
"""
|
|
1134
1310
|
|
|
1135
|
-
|
|
1311
|
+
ordered_list = next((elem.iterchildren(tag="ol")), None)
|
|
1312
|
+
if ordered_list is None:
|
|
1313
|
+
raise DocumentError("expected: `<ol>` as direct child of footnote definition block")
|
|
1314
|
+
|
|
1315
|
+
for list_item in ordered_list:
|
|
1316
|
+
if list_item.tag != "li":
|
|
1317
|
+
raise DocumentError("expected: `<li>` as children of `<ol>` in footnote definition block")
|
|
1318
|
+
|
|
1136
1319
|
def_id = list_item.attrib.pop("id", "")
|
|
1137
1320
|
if not def_id.startswith("fn:"):
|
|
1138
1321
|
raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
|
|
1139
1322
|
footnote_def = def_id.removeprefix("fn:")
|
|
1140
1323
|
|
|
1141
|
-
paragraph = list_item
|
|
1142
|
-
|
|
1143
|
-
|
|
1324
|
+
paragraph = next((list_item.iterchildren(tag="p")), None)
|
|
1325
|
+
if paragraph is None:
|
|
1326
|
+
raise DocumentError("expected: `<p>` as a child of `<li>` in a footnote definition")
|
|
1327
|
+
|
|
1328
|
+
ref_anchor = next((paragraph.iterchildren(tag="a", reversed=True)), None)
|
|
1329
|
+
if ref_anchor is None:
|
|
1144
1330
|
raise DocumentError("expected: `<a>` as the last HTML element in a footnote definition")
|
|
1145
1331
|
|
|
1146
|
-
ref_href = ref_anchor.
|
|
1332
|
+
ref_href = ref_anchor.get("href", "")
|
|
1147
1333
|
if not ref_href.startswith("#fnref:"):
|
|
1148
1334
|
raise DocumentError("expected: attribute `href` of format `#fnref:NAME` applied on last element `<a>` for a footnote definition")
|
|
1149
1335
|
footnote_ref = ref_href.removeprefix("#fnref:")
|
|
@@ -1152,26 +1338,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1152
1338
|
paragraph.remove(ref_anchor)
|
|
1153
1339
|
|
|
1154
1340
|
# build new anchor for footnote definition
|
|
1155
|
-
def_anchor =
|
|
1341
|
+
def_anchor = AC_ELEM(
|
|
1156
1342
|
"structured-macro",
|
|
1157
1343
|
{
|
|
1158
|
-
|
|
1159
|
-
|
|
1344
|
+
AC_ATTR("name"): "anchor",
|
|
1345
|
+
AC_ATTR("schema-version"): "1",
|
|
1160
1346
|
},
|
|
1161
|
-
|
|
1347
|
+
AC_ELEM(
|
|
1162
1348
|
"parameter",
|
|
1163
|
-
{
|
|
1349
|
+
{AC_ATTR("name"): ""},
|
|
1164
1350
|
f"footnote-def-{footnote_def}",
|
|
1165
1351
|
),
|
|
1166
1352
|
)
|
|
1167
1353
|
|
|
1168
1354
|
# build new link to footnote reference in page body
|
|
1169
|
-
ref_link =
|
|
1355
|
+
ref_link = AC_ELEM(
|
|
1170
1356
|
"link",
|
|
1171
1357
|
{
|
|
1172
|
-
|
|
1358
|
+
AC_ATTR("anchor"): f"footnote-ref-{footnote_ref}",
|
|
1173
1359
|
},
|
|
1174
|
-
|
|
1360
|
+
AC_ELEM("link-body", ET.CDATA("↩")),
|
|
1175
1361
|
)
|
|
1176
1362
|
|
|
1177
1363
|
# append children synthesized for Confluence
|
|
@@ -1180,79 +1366,132 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1180
1366
|
paragraph.text = None
|
|
1181
1367
|
paragraph.append(ref_link)
|
|
1182
1368
|
|
|
1369
|
+
def _transform_tasklist(self, elem: ET._Element) -> ET._Element:
|
|
1370
|
+
"""
|
|
1371
|
+
Transforms a list of tasks into an action widget.
|
|
1372
|
+
|
|
1373
|
+
:see: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-tasklists
|
|
1374
|
+
"""
|
|
1375
|
+
|
|
1376
|
+
if elem.tag != "ul":
|
|
1377
|
+
raise DocumentError("expected: `<ul>` as the HTML element for a tasklist")
|
|
1378
|
+
|
|
1379
|
+
for item in elem:
|
|
1380
|
+
if item.tag != "li":
|
|
1381
|
+
raise DocumentError("expected: `<li>` as the HTML element for a task")
|
|
1382
|
+
if not element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]):
|
|
1383
|
+
raise DocumentError("expected: each `<li>` in a task list starting with [ ] or [x]")
|
|
1384
|
+
|
|
1385
|
+
tasks: list[ET._Element] = []
|
|
1386
|
+
for index, item in enumerate(elem, start=1):
|
|
1387
|
+
if item.text is None:
|
|
1388
|
+
raise NotImplementedError("pre-condition check not exhaustive")
|
|
1389
|
+
match = re.match(r"^\[([x X])\]", item.text)
|
|
1390
|
+
if match is None:
|
|
1391
|
+
raise NotImplementedError("pre-condition check not exhaustive")
|
|
1392
|
+
|
|
1393
|
+
status = "incomplete" if match.group(1).isspace() else "complete"
|
|
1394
|
+
item.text = item.text[3:]
|
|
1395
|
+
|
|
1396
|
+
# transform Markdown to Confluence within tasklist content
|
|
1397
|
+
self.visit(item)
|
|
1398
|
+
|
|
1399
|
+
body = AC_ELEM("task-body", *list(item))
|
|
1400
|
+
body.text = item.text
|
|
1401
|
+
tasks.append(
|
|
1402
|
+
AC_ELEM(
|
|
1403
|
+
"task",
|
|
1404
|
+
{},
|
|
1405
|
+
AC_ELEM("task-id", str(index)),
|
|
1406
|
+
AC_ELEM("task-uuid", str(uuid.uuid4())),
|
|
1407
|
+
AC_ELEM("task-status", status),
|
|
1408
|
+
body,
|
|
1409
|
+
),
|
|
1410
|
+
)
|
|
1411
|
+
return AC_ELEM("task-list", {}, *tasks)
|
|
1412
|
+
|
|
1413
|
+
@override
|
|
1183
1414
|
def transform(self, child: ET._Element) -> Optional[ET._Element]:
|
|
1184
1415
|
"""
|
|
1185
1416
|
Transforms an HTML element tree obtained from a Markdown document into a Confluence Storage Format element tree.
|
|
1186
1417
|
"""
|
|
1187
1418
|
|
|
1188
|
-
#
|
|
1419
|
+
# replace line breaks with regular space in element text to minimize phantom changes
|
|
1189
1420
|
if child.text:
|
|
1190
|
-
text
|
|
1191
|
-
child.text = text.replace("\n", " ")
|
|
1421
|
+
child.text = child.text.replace("\n", " ")
|
|
1192
1422
|
if child.tail:
|
|
1193
|
-
tail
|
|
1194
|
-
child.tail = tail.replace("\n", " ")
|
|
1423
|
+
child.tail = child.tail.replace("\n", " ")
|
|
1195
1424
|
|
|
1196
1425
|
if not isinstance(child.tag, str):
|
|
1197
1426
|
return None
|
|
1198
1427
|
|
|
1199
|
-
# <
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1428
|
+
# <p>...</p>
|
|
1429
|
+
if child.tag == "p":
|
|
1430
|
+
# <p><img src="..." /></p>
|
|
1431
|
+
if len(child) == 1 and not child.text and child[0].tag == "img" and not child[0].tail:
|
|
1432
|
+
return self._transform_image(FormattingContext.BLOCK, child[0])
|
|
1433
|
+
|
|
1434
|
+
# <p>[[<em>TOC</em>]]</p> (represented in Markdown as `[[_TOC_]]`)
|
|
1435
|
+
elif is_placeholder_for(child, "TOC"):
|
|
1436
|
+
return self._transform_toc(child)
|
|
1437
|
+
|
|
1438
|
+
# <p>[[<em>LISTING</em>]]</p> (represented in Markdown as `[[_LISTING_]]`)
|
|
1439
|
+
elif is_placeholder_for(child, "LISTING"):
|
|
1440
|
+
return self._transform_listing(child)
|
|
1441
|
+
|
|
1442
|
+
# <div>...</div>
|
|
1443
|
+
elif child.tag == "div":
|
|
1444
|
+
classes = child.get("class", "").split(" ")
|
|
1445
|
+
|
|
1446
|
+
# <div class="arithmatex">...</div>
|
|
1447
|
+
if "arithmatex" in classes:
|
|
1448
|
+
return self._transform_block_math(child)
|
|
1449
|
+
|
|
1450
|
+
# <div><ac:structured-macro ...>...</ac:structured-macro></div>
|
|
1451
|
+
elif "csf" in classes:
|
|
1452
|
+
if len(child) != 1:
|
|
1453
|
+
raise DocumentError("expected: single child in Confluence Storage Format block")
|
|
1454
|
+
|
|
1455
|
+
return child[0]
|
|
1456
|
+
|
|
1457
|
+
# <div class="footnote">
|
|
1458
|
+
# <hr/>
|
|
1459
|
+
# <ol>
|
|
1460
|
+
# <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
|
|
1461
|
+
# </ol>
|
|
1462
|
+
# </div>
|
|
1463
|
+
elif "footnote" in classes:
|
|
1464
|
+
self._transform_footnote_def(child)
|
|
1209
1465
|
return None
|
|
1210
1466
|
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
# <
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
# <p>[!TIP] ...</p>
|
|
1240
|
-
# </blockquote>
|
|
1241
|
-
elif child.tag == "blockquote" and len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
|
|
1242
|
-
return self._transform_github_alert(child)
|
|
1243
|
-
|
|
1244
|
-
# Alerts in GitLab
|
|
1245
|
-
# <blockquote>
|
|
1246
|
-
# <p>DISCLAIMER: ...</p>
|
|
1247
|
-
# </blockquote>
|
|
1248
|
-
elif (
|
|
1249
|
-
child.tag == "blockquote"
|
|
1250
|
-
and len(child) > 0
|
|
1251
|
-
and child[0].tag == "p"
|
|
1252
|
-
and child[0].text is not None
|
|
1253
|
-
and starts_with_any(child[0].text, ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"])
|
|
1254
|
-
):
|
|
1255
|
-
return self._transform_gitlab_alert(child)
|
|
1467
|
+
# <div class="admonition note">
|
|
1468
|
+
# <p class="admonition-title">Note</p>
|
|
1469
|
+
# <p>...</p>
|
|
1470
|
+
# </div>
|
|
1471
|
+
#
|
|
1472
|
+
# --- OR ---
|
|
1473
|
+
#
|
|
1474
|
+
# <div class="admonition note">
|
|
1475
|
+
# <p>...</p>
|
|
1476
|
+
# </div>
|
|
1477
|
+
elif "admonition" in classes:
|
|
1478
|
+
return self._transform_admonition(child)
|
|
1479
|
+
|
|
1480
|
+
# <blockquote>...</blockquote>
|
|
1481
|
+
elif child.tag == "blockquote":
|
|
1482
|
+
# Alerts in GitHub
|
|
1483
|
+
# <blockquote>
|
|
1484
|
+
# <p>[!TIP] ...</p>
|
|
1485
|
+
# </blockquote>
|
|
1486
|
+
if len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
|
|
1487
|
+
return self._transform_github_alert(child)
|
|
1488
|
+
|
|
1489
|
+
# Alerts in GitLab
|
|
1490
|
+
# <blockquote>
|
|
1491
|
+
# <p>DISCLAIMER: ...</p>
|
|
1492
|
+
# </blockquote>
|
|
1493
|
+
elif len(child) > 0 and child[0].tag == "p" and element_text_starts_with_any(child[0], ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"]):
|
|
1494
|
+
return self._transform_gitlab_alert(child)
|
|
1256
1495
|
|
|
1257
1496
|
# <details markdown="1">
|
|
1258
1497
|
# <summary>...</summary>
|
|
@@ -1261,44 +1500,84 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1261
1500
|
elif child.tag == "details" and len(child) > 1 and child[0].tag == "summary":
|
|
1262
1501
|
return self._transform_section(child)
|
|
1263
1502
|
|
|
1503
|
+
# <ol>...</ol>
|
|
1504
|
+
elif child.tag == "ol":
|
|
1505
|
+
# Confluence adds the attribute `start` for every ordered list
|
|
1506
|
+
child.set("start", "1")
|
|
1507
|
+
return None
|
|
1508
|
+
|
|
1509
|
+
# <ul>
|
|
1510
|
+
# <li>[ ] ...</li>
|
|
1511
|
+
# <li>[x] ...</li>
|
|
1512
|
+
# </ul>
|
|
1513
|
+
elif child.tag == "ul":
|
|
1514
|
+
if len(child) > 0 and element_text_starts_with_any(child[0], ["[ ]", "[x]", "[X]"]):
|
|
1515
|
+
return self._transform_tasklist(child)
|
|
1516
|
+
|
|
1517
|
+
return None
|
|
1518
|
+
|
|
1519
|
+
elif child.tag == "li":
|
|
1520
|
+
normalize_inline(child)
|
|
1521
|
+
return None
|
|
1522
|
+
|
|
1523
|
+
# <pre><code class="language-java"> ... </code></pre>
|
|
1524
|
+
elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
|
|
1525
|
+
return self._transform_code_block(child[0])
|
|
1526
|
+
|
|
1527
|
+
# <table>...</table>
|
|
1528
|
+
elif child.tag == "table":
|
|
1529
|
+
child.set("data-layout", "default")
|
|
1530
|
+
return None
|
|
1531
|
+
|
|
1264
1532
|
# <img src="..." alt="..." />
|
|
1265
1533
|
elif child.tag == "img":
|
|
1266
|
-
return self._transform_image(child)
|
|
1534
|
+
return self._transform_image(FormattingContext.INLINE, child)
|
|
1267
1535
|
|
|
1268
1536
|
# <a href="..."> ... </a>
|
|
1269
1537
|
elif child.tag == "a":
|
|
1270
1538
|
return self._transform_link(child)
|
|
1271
1539
|
|
|
1272
|
-
# <
|
|
1273
|
-
elif child.tag == "
|
|
1274
|
-
return self.
|
|
1275
|
-
|
|
1276
|
-
# <span data-emoji-shortname="..." data-emoji-unicode="...">...</span>
|
|
1277
|
-
elif child.tag == "span" and child.attrib.has_key("data-emoji-shortname"):
|
|
1278
|
-
return self._transform_emoji(child)
|
|
1540
|
+
# <mark>...</mark>
|
|
1541
|
+
elif child.tag == "mark":
|
|
1542
|
+
return self._transform_mark(child)
|
|
1279
1543
|
|
|
1280
|
-
# <
|
|
1281
|
-
elif child.tag == "
|
|
1282
|
-
|
|
1544
|
+
# <span>...</span>
|
|
1545
|
+
elif child.tag == "span":
|
|
1546
|
+
classes = child.get("class", "").split(" ")
|
|
1283
1547
|
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1548
|
+
# <span class="arithmatex">...</span>
|
|
1549
|
+
if "arithmatex" in classes:
|
|
1550
|
+
return self._transform_inline_math(child)
|
|
1287
1551
|
|
|
1288
1552
|
# <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
|
|
1289
|
-
elif child.tag == "sup" and child.
|
|
1553
|
+
elif child.tag == "sup" and child.get("id", "").startswith("fnref:"):
|
|
1290
1554
|
self._transform_footnote_ref(child)
|
|
1291
1555
|
return None
|
|
1292
1556
|
|
|
1293
|
-
# <
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
#
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1557
|
+
# <input type="date" value="1984-01-01" />
|
|
1558
|
+
elif child.tag == "input" and child.get("type", "") == "date":
|
|
1559
|
+
return HTML("time", {"datetime": child.get("value", "")})
|
|
1560
|
+
|
|
1561
|
+
# <ins>...</ins>
|
|
1562
|
+
elif child.tag == "ins":
|
|
1563
|
+
# Confluence prefers <u> over <ins> for underline, and replaces <ins> with <u>
|
|
1564
|
+
child.tag = "u"
|
|
1565
|
+
|
|
1566
|
+
# <x-emoji data-shortname="wink" data-unicode="1f609">😉</x-emoji>
|
|
1567
|
+
elif child.tag == "x-emoji":
|
|
1568
|
+
return self._transform_emoji(child)
|
|
1569
|
+
|
|
1570
|
+
# <h1>...</h1>
|
|
1571
|
+
# <h2>...</h2> ...
|
|
1572
|
+
m = re.match(r"^h([1-6])$", child.tag, flags=re.IGNORECASE)
|
|
1573
|
+
if m is not None:
|
|
1574
|
+
level = int(m.group(1))
|
|
1575
|
+
title = element_to_text(child)
|
|
1576
|
+
self.toc.add(level, title)
|
|
1577
|
+
|
|
1578
|
+
if self.options.heading_anchors:
|
|
1579
|
+
self._transform_heading(child)
|
|
1580
|
+
return None
|
|
1302
1581
|
|
|
1303
1582
|
return None
|
|
1304
1583
|
|
|
@@ -1312,11 +1591,15 @@ class ConversionError(RuntimeError):
|
|
|
1312
1591
|
|
|
1313
1592
|
|
|
1314
1593
|
class ConfluenceDocument:
|
|
1594
|
+
"Encapsulates an element tree for a Confluence document created by parsing a Markdown document."
|
|
1595
|
+
|
|
1315
1596
|
title: Optional[str]
|
|
1316
1597
|
labels: Optional[list[str]]
|
|
1317
1598
|
properties: Optional[dict[str, JsonType]]
|
|
1599
|
+
|
|
1318
1600
|
links: list[str]
|
|
1319
|
-
images: list[
|
|
1601
|
+
images: list[ImageData]
|
|
1602
|
+
embedded_files: dict[str, EmbeddedFileData]
|
|
1320
1603
|
|
|
1321
1604
|
options: ConfluenceDocumentOptions
|
|
1322
1605
|
root: ET._Element
|
|
@@ -1355,10 +1638,18 @@ class ConfluenceDocument:
|
|
|
1355
1638
|
site_metadata: ConfluenceSiteMetadata,
|
|
1356
1639
|
page_metadata: ConfluencePageCollection,
|
|
1357
1640
|
) -> None:
|
|
1641
|
+
"Converts a single Markdown document to Confluence Storage Format."
|
|
1642
|
+
|
|
1358
1643
|
self.options = options
|
|
1359
1644
|
|
|
1645
|
+
# register auxiliary URL substitutions
|
|
1646
|
+
lines: list[str] = []
|
|
1647
|
+
for data_uri, color in status_images.items():
|
|
1648
|
+
lines.append(f"[STATUS-{color.upper()}]: {data_uri}")
|
|
1649
|
+
lines.append(document.text)
|
|
1650
|
+
|
|
1360
1651
|
# convert to HTML
|
|
1361
|
-
html = markdown_to_html(
|
|
1652
|
+
html = markdown_to_html("\n".join(lines))
|
|
1362
1653
|
|
|
1363
1654
|
# parse Markdown document
|
|
1364
1655
|
if self.options.generated_by is not None:
|
|
@@ -1390,10 +1681,13 @@ class ConfluenceDocument:
|
|
|
1390
1681
|
site_metadata,
|
|
1391
1682
|
page_metadata,
|
|
1392
1683
|
)
|
|
1393
|
-
|
|
1684
|
+
try:
|
|
1685
|
+
converter.visit(self.root)
|
|
1686
|
+
except DocumentError as ex:
|
|
1687
|
+
raise ConversionError(path) from ex
|
|
1394
1688
|
self.links = converter.links
|
|
1395
1689
|
self.images = converter.images
|
|
1396
|
-
self.
|
|
1690
|
+
self.embedded_files = converter.embedded_files
|
|
1397
1691
|
|
|
1398
1692
|
self.title = document.title or converter.toc.get_title()
|
|
1399
1693
|
self.labels = document.tags
|
|
@@ -1433,41 +1727,3 @@ def attachment_name(ref: Union[Path, str]) -> str:
|
|
|
1433
1727
|
|
|
1434
1728
|
parts = [replace_part(p) for p in path.parts]
|
|
1435
1729
|
return Path(*parts).as_posix().replace("/", "_")
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
def elements_to_string(root: ET._Element) -> str:
|
|
1439
|
-
xml = ET.tostring(root, encoding="utf8", method="xml").decode("utf8")
|
|
1440
|
-
m = re.match(r"^<root\s+[^>]*>(.*)</root>\s*$", xml, re.DOTALL)
|
|
1441
|
-
if m:
|
|
1442
|
-
return m.group(1)
|
|
1443
|
-
else:
|
|
1444
|
-
raise ValueError("expected: Confluence content")
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
def _content_to_string(dtd_path: Path, content: str) -> str:
|
|
1448
|
-
parser = ET.XMLParser(
|
|
1449
|
-
remove_blank_text=True,
|
|
1450
|
-
remove_comments=True,
|
|
1451
|
-
strip_cdata=False,
|
|
1452
|
-
load_dtd=True,
|
|
1453
|
-
)
|
|
1454
|
-
|
|
1455
|
-
ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in namespaces.items())
|
|
1456
|
-
|
|
1457
|
-
data = [
|
|
1458
|
-
'<?xml version="1.0"?>',
|
|
1459
|
-
f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
|
|
1460
|
-
]
|
|
1461
|
-
data.append(content)
|
|
1462
|
-
data.append("</root>")
|
|
1463
|
-
|
|
1464
|
-
tree = ET.fromstringlist(data, parser=parser)
|
|
1465
|
-
return ET.tostring(tree, pretty_print=True).decode("utf-8")
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
def content_to_string(content: str) -> str:
|
|
1469
|
-
"Converts a Confluence Storage Format document returned by the API into a readable XML document."
|
|
1470
|
-
|
|
1471
|
-
resource_path = resources.files(__package__).joinpath("entities.dtd")
|
|
1472
|
-
with resources.as_file(resource_path) as dtd_path:
|
|
1473
|
-
return _content_to_string(dtd_path, content)
|