markdown-to-confluence 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/METADATA +95 -53
- {markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/RECORD +29 -27
- {markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/WHEEL +1 -1
- md2conf/__init__.py +1 -1
- md2conf/__main__.py +23 -172
- md2conf/api.py +32 -67
- md2conf/attachment.py +4 -3
- md2conf/clio.py +226 -0
- md2conf/compatibility.py +5 -0
- md2conf/converter.py +235 -143
- md2conf/csf.py +89 -9
- md2conf/drawio/render.py +2 -0
- md2conf/frontmatter.py +18 -6
- md2conf/image.py +7 -5
- md2conf/latex.py +8 -1
- md2conf/markdown.py +68 -1
- md2conf/options.py +93 -24
- md2conf/plantuml/extension.py +1 -1
- md2conf/publisher.py +81 -16
- md2conf/reflection.py +74 -0
- md2conf/scanner.py +9 -5
- md2conf/serializer.py +12 -1
- md2conf/svg.py +5 -2
- md2conf/toc.py +1 -1
- md2conf/xml.py +45 -0
- {markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/entry_points.txt +0 -0
- {markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/licenses/LICENSE +0 -0
- {markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/top_level.txt +0 -0
- {markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/zip-safe +0 -0
md2conf/converter.py
CHANGED
|
@@ -7,13 +7,13 @@ Copyright 2022-2026, Levente Hunyadi
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import copy
|
|
10
|
-
import hashlib
|
|
11
10
|
import logging
|
|
12
11
|
import os.path
|
|
13
12
|
import re
|
|
14
13
|
import uuid
|
|
15
14
|
from abc import ABC, abstractmethod
|
|
16
15
|
from dataclasses import dataclass
|
|
16
|
+
from enum import Enum
|
|
17
17
|
from pathlib import Path
|
|
18
18
|
from typing import ClassVar
|
|
19
19
|
from urllib.parse import ParseResult, quote_plus, urlparse
|
|
@@ -32,17 +32,17 @@ from .extension import ExtensionOptions, MarketplaceExtension
|
|
|
32
32
|
from .formatting import FormattingContext, ImageAlignment, ImageAttributes
|
|
33
33
|
from .image import ImageGenerator, ImageGeneratorOptions
|
|
34
34
|
from .latex import render_latex
|
|
35
|
-
from .markdown import markdown_to_html
|
|
35
|
+
from .markdown import markdown_to_html, markdown_with_line_numbers
|
|
36
36
|
from .mermaid.extension import MermaidExtension
|
|
37
37
|
from .metadata import ConfluenceSiteMetadata
|
|
38
38
|
from .options import ConfluencePageID, ConverterOptions, DocumentOptions
|
|
39
39
|
from .plantuml.extension import PlantUMLExtension
|
|
40
|
-
from .png import
|
|
40
|
+
from .png import remove_png_chunks
|
|
41
41
|
from .scanner import ScannedDocument, Scanner
|
|
42
42
|
from .serializer import JsonType
|
|
43
43
|
from .toc import TableOfContentsBuilder
|
|
44
44
|
from .uri import is_absolute_url, to_uuid_urn
|
|
45
|
-
from .xml import element_to_text
|
|
45
|
+
from .xml import element_to_text, remove_element
|
|
46
46
|
|
|
47
47
|
ElementType = ET._Element # pyright: ignore [reportPrivateUsage]
|
|
48
48
|
|
|
@@ -114,14 +114,18 @@ def fix_absolute_path(path: Path, root_path: Path) -> Path:
|
|
|
114
114
|
return root_path / path.relative_to(path.root)
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
_UNSAFE_CHAR_REGEXP = re.compile(r"[^A-Za-z0-9._~()'!*:@,;+?-]+")
|
|
118
|
+
_MULTIPLE_SPACE_REGEXP = re.compile(r"\s\s+")
|
|
119
|
+
|
|
120
|
+
|
|
117
121
|
def encode_title(text: str) -> str:
|
|
118
122
|
"Converts a title string such that it is safe to embed into a Confluence URL."
|
|
119
123
|
|
|
120
124
|
# replace unsafe characters with space
|
|
121
|
-
text =
|
|
125
|
+
text = _UNSAFE_CHAR_REGEXP.sub(" ", text)
|
|
122
126
|
|
|
123
127
|
# replace multiple consecutive spaces with single space
|
|
124
|
-
text =
|
|
128
|
+
text = _MULTIPLE_SPACE_REGEXP.sub(" ", text)
|
|
125
129
|
|
|
126
130
|
# URL-encode
|
|
127
131
|
return quote_plus(text.strip())
|
|
@@ -215,6 +219,13 @@ _LANGUAGES = {
|
|
|
215
219
|
# spellchecker: enable
|
|
216
220
|
|
|
217
221
|
|
|
222
|
+
class ElementAction(Enum):
|
|
223
|
+
"Captures standard actions a node visitor may take with the element."
|
|
224
|
+
|
|
225
|
+
RECURSE = "recurse"
|
|
226
|
+
REMOVE = "remove"
|
|
227
|
+
|
|
228
|
+
|
|
218
229
|
class NodeVisitor(ABC):
|
|
219
230
|
def visit(self, node: ElementType) -> None:
|
|
220
231
|
"Recursively visits all descendants of this node."
|
|
@@ -222,29 +233,44 @@ class NodeVisitor(ABC):
|
|
|
222
233
|
if len(node) < 1:
|
|
223
234
|
return
|
|
224
235
|
|
|
225
|
-
|
|
236
|
+
index = 0
|
|
237
|
+
count = len(node)
|
|
238
|
+
while index < count:
|
|
226
239
|
source = node[index]
|
|
227
240
|
target = self.transform(source)
|
|
228
|
-
if target
|
|
241
|
+
if isinstance(target, ElementAction):
|
|
242
|
+
match target:
|
|
243
|
+
case ElementAction.RECURSE:
|
|
244
|
+
# recurse into the element
|
|
245
|
+
self.visit(source)
|
|
246
|
+
index += 1
|
|
247
|
+
case ElementAction.REMOVE:
|
|
248
|
+
# remove the element from the tree
|
|
249
|
+
remove_element(source)
|
|
250
|
+
count -= 1
|
|
251
|
+
else:
|
|
229
252
|
# chain sibling text node that immediately follows original element
|
|
230
253
|
target.tail = source.tail
|
|
231
254
|
source.tail = None
|
|
232
255
|
|
|
233
256
|
# replace original element with transformed element
|
|
234
257
|
node[index] = target
|
|
235
|
-
|
|
236
|
-
self.visit(source)
|
|
258
|
+
index += 1
|
|
237
259
|
|
|
238
260
|
@abstractmethod
|
|
239
|
-
def transform(self, child: ElementType) -> ElementType |
|
|
261
|
+
def transform(self, child: ElementType) -> ElementType | ElementAction: ...
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
_DISALLOWED_CHAR_REGEXP = re.compile(r"[^\sA-Za-z0-9_\-]")
|
|
265
|
+
_SPACE_COLLAPSE_REGEXP = re.compile(r"\s+")
|
|
240
266
|
|
|
241
267
|
|
|
242
268
|
def title_to_identifier(title: str) -> str:
|
|
243
269
|
"Converts a section heading title to a GitHub-style Markdown same-page anchor."
|
|
244
270
|
|
|
245
271
|
s = title.strip().lower()
|
|
246
|
-
s =
|
|
247
|
-
s =
|
|
272
|
+
s = _DISALLOWED_CHAR_REGEXP.sub("", s)
|
|
273
|
+
s = _SPACE_COLLAPSE_REGEXP.sub("-", s)
|
|
248
274
|
return s
|
|
249
275
|
|
|
250
276
|
|
|
@@ -256,6 +282,12 @@ def element_text_starts_with_any(node: ElementType, prefixes: list[str]) -> bool
|
|
|
256
282
|
return starts_with_any(node.text, prefixes)
|
|
257
283
|
|
|
258
284
|
|
|
285
|
+
def child_count(node: ElementType) -> int:
|
|
286
|
+
"Number of children, excluding special elements."
|
|
287
|
+
|
|
288
|
+
return len(node) - sum(1 for _ in node.iterchildren("line-number"))
|
|
289
|
+
|
|
290
|
+
|
|
259
291
|
def is_placeholder_for(node: ElementType, name: str) -> bool:
|
|
260
292
|
"""
|
|
261
293
|
Identifies a Confluence widget placeholder, e.g. `[[_TOC_]]` or `[[_LISTING_]]`.
|
|
@@ -265,7 +297,7 @@ def is_placeholder_for(node: ElementType, name: str) -> bool:
|
|
|
265
297
|
"""
|
|
266
298
|
|
|
267
299
|
# `[[_TOC_]]` is represented in HTML as <p>[[<em>TOC</em>]]</p>
|
|
268
|
-
if node.text != "[[" or
|
|
300
|
+
if node.text != "[[" or child_count(node) != 1:
|
|
269
301
|
return False
|
|
270
302
|
|
|
271
303
|
child = node[0]
|
|
@@ -275,6 +307,65 @@ def is_placeholder_for(node: ElementType, name: str) -> bool:
|
|
|
275
307
|
return True
|
|
276
308
|
|
|
277
309
|
|
|
310
|
+
class PreprocessingError(RuntimeError):
|
|
311
|
+
"Raised when a preprocessing step has failed."
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
class DocumentError(RuntimeError):
|
|
315
|
+
"Raised when a converted Markdown document has an unexpected element or attribute."
|
|
316
|
+
|
|
317
|
+
element: ElementType
|
|
318
|
+
|
|
319
|
+
def __init__(self, element: ElementType, message: str) -> None:
|
|
320
|
+
super().__init__(message)
|
|
321
|
+
self.element = element
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class ConversionError(RuntimeError):
|
|
325
|
+
"Raised when a Markdown document cannot be converted to Confluence Storage Format."
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def transform_skip_comments_in_html(html: str) -> str:
|
|
329
|
+
"""
|
|
330
|
+
Transforms HTML comments marking skip sections into custom elements.
|
|
331
|
+
|
|
332
|
+
From:
|
|
333
|
+
```
|
|
334
|
+
<!-- confluence-skip-start --> ... <!-- confluence-skip-end -->
|
|
335
|
+
```
|
|
336
|
+
Into:
|
|
337
|
+
```
|
|
338
|
+
<confluence-skip> ... </confluence-skip>
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
This must run BEFORE the HTML (generated from Markdown) is parsed, as the XML parser strips comments (remove_comments=True).
|
|
342
|
+
|
|
343
|
+
:param html: HTML string with skip comment markers.
|
|
344
|
+
:returns: HTML string with comments replaced by custom elements.
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
start_pattern = re.compile(r"<!--\s*confluence-skip-start\s*-->")
|
|
348
|
+
end_pattern = re.compile(r"<!--\s*confluence-skip-end\s*-->")
|
|
349
|
+
|
|
350
|
+
start_count = sum(1 for _ in start_pattern.finditer(html))
|
|
351
|
+
end_count = sum(1 for _ in end_pattern.finditer(html))
|
|
352
|
+
|
|
353
|
+
if start_count != end_count:
|
|
354
|
+
raise PreprocessingError(f"unmatched confluence-skip markers: found {start_count} start marker(s) and {end_count} end marker(s)")
|
|
355
|
+
|
|
356
|
+
if start_count < 1:
|
|
357
|
+
return html
|
|
358
|
+
|
|
359
|
+
skip_pattern = re.compile(r"<!--\s*confluence-skip-start\s*-->(.*?)<!--\s*confluence-skip-end\s*-->", flags=re.DOTALL)
|
|
360
|
+
html = skip_pattern.sub(r"<confluence-skip>\1</confluence-skip>", html)
|
|
361
|
+
|
|
362
|
+
return html
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
_FOOTNOTE_REF_REGEXP = re.compile(r"^fnref(\d*):(.+)$")
|
|
366
|
+
_TASKLIST_REGEXP = re.compile(r"^\[([x X])\]")
|
|
367
|
+
|
|
368
|
+
|
|
278
369
|
@dataclass
|
|
279
370
|
class ConfluencePanel:
|
|
280
371
|
emoji: str
|
|
@@ -402,14 +493,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
402
493
|
def _anchor_warn_or_raise(self, anchor: ElementType, msg: str) -> None:
|
|
403
494
|
"Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
|
|
404
495
|
|
|
405
|
-
if self.options.
|
|
496
|
+
if self.options.force_valid_url:
|
|
497
|
+
raise DocumentError(anchor, msg)
|
|
498
|
+
else:
|
|
406
499
|
LOGGER.warning(msg)
|
|
407
500
|
if anchor.text:
|
|
408
501
|
anchor.text = "❌ " + anchor.text
|
|
409
502
|
elif len(anchor) > 0:
|
|
410
503
|
anchor.text = "❌ "
|
|
411
|
-
else:
|
|
412
|
-
raise DocumentError(msg)
|
|
413
504
|
|
|
414
505
|
def _transform_link(self, anchor: ElementType) -> ElementType | None:
|
|
415
506
|
"""
|
|
@@ -486,7 +577,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
486
577
|
space_key = link_metadata.space_key or self.site_metadata.space_key
|
|
487
578
|
|
|
488
579
|
if space_key is None:
|
|
489
|
-
raise DocumentError("Confluence space key required for building full web URLs")
|
|
580
|
+
raise DocumentError(anchor, "Confluence space key required for building full web URLs")
|
|
490
581
|
|
|
491
582
|
page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{encode_title(link_metadata.title)}"
|
|
492
583
|
|
|
@@ -563,7 +654,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
563
654
|
|
|
564
655
|
src = image.get("src")
|
|
565
656
|
if not src:
|
|
566
|
-
raise DocumentError("image lacks `src` attribute")
|
|
657
|
+
raise DocumentError(image, "image lacks `src` attribute")
|
|
567
658
|
|
|
568
659
|
alt = image.get("alt")
|
|
569
660
|
if alt is not None and src.startswith("urn:uuid:") and (color := status_images.get(src)) is not None:
|
|
@@ -589,7 +680,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
589
680
|
else:
|
|
590
681
|
path = Path(src)
|
|
591
682
|
|
|
592
|
-
absolute_path = self._verify_image_path(path)
|
|
683
|
+
absolute_path = self._verify_image_path(image, path)
|
|
593
684
|
if absolute_path is None:
|
|
594
685
|
return self._create_missing(path, attrs)
|
|
595
686
|
|
|
@@ -615,15 +706,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
615
706
|
|
|
616
707
|
return AC_ELEM("image", attrs.as_dict(max_width=self.options.layout.image.max_width), *elements)
|
|
617
708
|
|
|
618
|
-
def _warn_or_raise(self, msg: str) -> None:
|
|
709
|
+
def _warn_or_raise(self, image: ElementType, msg: str) -> None:
|
|
619
710
|
"Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
|
|
620
711
|
|
|
621
|
-
if self.options.
|
|
622
|
-
|
|
712
|
+
if self.options.force_valid_url:
|
|
713
|
+
raise DocumentError(image, msg)
|
|
623
714
|
else:
|
|
624
|
-
|
|
715
|
+
LOGGER.warning(msg)
|
|
625
716
|
|
|
626
|
-
def _verify_image_path(self, path: Path) -> Path | None:
|
|
717
|
+
def _verify_image_path(self, image: ElementType, path: Path) -> Path | None:
|
|
627
718
|
"Checks whether an image path is safe to use."
|
|
628
719
|
|
|
629
720
|
if path.is_absolute():
|
|
@@ -633,11 +724,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
633
724
|
absolute_path = (self.base_dir / path).resolve()
|
|
634
725
|
|
|
635
726
|
if not absolute_path.exists():
|
|
636
|
-
self._warn_or_raise(f"path to image does not exist: {path}")
|
|
727
|
+
self._warn_or_raise(image, f"path to image does not exist: {path}")
|
|
637
728
|
return None
|
|
638
729
|
|
|
639
730
|
if not is_directory_within(absolute_path, self.root_dir):
|
|
640
|
-
self._warn_or_raise(f"path to image {path} points to outside root path {self.root_dir}")
|
|
731
|
+
self._warn_or_raise(image, f"path to image {path} points to outside root path {self.root_dir}")
|
|
641
732
|
return None
|
|
642
733
|
|
|
643
734
|
return absolute_path
|
|
@@ -744,15 +835,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
744
835
|
"""
|
|
745
836
|
|
|
746
837
|
if len(elem) < 1:
|
|
747
|
-
raise DocumentError("empty admonition")
|
|
838
|
+
raise DocumentError(elem, "empty admonition")
|
|
748
839
|
|
|
749
840
|
# <div class="admonition note">
|
|
750
841
|
class_list = elem.get("class", "").split(" ")
|
|
751
842
|
class_list.remove("admonition")
|
|
752
843
|
if len(class_list) > 1:
|
|
753
|
-
raise DocumentError(f"too many admonition types: {class_list}")
|
|
844
|
+
raise DocumentError(elem, f"too many admonition types: {class_list}")
|
|
754
845
|
elif len(class_list) < 1:
|
|
755
|
-
raise DocumentError("missing specific admonition type")
|
|
846
|
+
raise DocumentError(elem, "missing specific admonition type")
|
|
756
847
|
admonition = class_list[0]
|
|
757
848
|
|
|
758
849
|
for e in elem:
|
|
@@ -761,11 +852,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
761
852
|
# <p class="admonition-title">Note</p>
|
|
762
853
|
if "admonition-title" in elem[0].get("class", "").split(" "):
|
|
763
854
|
content = [HTML.p(HTML.strong(elem[0].text or "")), *list(elem[1:])]
|
|
764
|
-
|
|
765
|
-
content
|
|
855
|
+
elem.clear(keep_tail=True)
|
|
856
|
+
elem.extend(content)
|
|
766
857
|
|
|
767
858
|
if self.options.use_panel:
|
|
768
|
-
return self._transform_panel(
|
|
859
|
+
return self._transform_panel(elem, admonition)
|
|
769
860
|
else:
|
|
770
861
|
admonition_to_csf = {
|
|
771
862
|
"attention": "note",
|
|
@@ -781,7 +872,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
781
872
|
}
|
|
782
873
|
class_name = admonition_to_csf.get(admonition)
|
|
783
874
|
if class_name is None:
|
|
784
|
-
raise DocumentError(f"unsupported admonition type: {admonition}")
|
|
875
|
+
raise DocumentError(elem, f"unsupported admonition type: {admonition}")
|
|
785
876
|
|
|
786
877
|
return AC_ELEM(
|
|
787
878
|
"structured-macro",
|
|
@@ -789,7 +880,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
789
880
|
AC_ATTR("name"): class_name,
|
|
790
881
|
AC_ATTR("schema-version"): "1",
|
|
791
882
|
},
|
|
792
|
-
AC_ELEM("rich-text-body", {}, *
|
|
883
|
+
AC_ELEM("rich-text-body", {}, *list(elem)),
|
|
793
884
|
)
|
|
794
885
|
|
|
795
886
|
def _transform_github_alert(self, blockquote: ElementType) -> ElementType:
|
|
@@ -797,32 +888,32 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
797
888
|
Creates a GitHub-style panel, normally triggered with a block-quote starting with a capitalized string such as `[!TIP]`.
|
|
798
889
|
"""
|
|
799
890
|
|
|
891
|
+
for e in blockquote:
|
|
892
|
+
self.visit(e)
|
|
893
|
+
|
|
800
894
|
if len(blockquote) < 1:
|
|
801
|
-
raise DocumentError("empty GitHub alert")
|
|
895
|
+
raise DocumentError(blockquote, "empty GitHub alert")
|
|
802
896
|
|
|
803
897
|
content = blockquote[0]
|
|
804
898
|
if content.text is None:
|
|
805
|
-
raise DocumentError("empty content for GitHub alert")
|
|
899
|
+
raise DocumentError(blockquote, "empty content for GitHub alert")
|
|
806
900
|
|
|
807
901
|
pattern = re.compile(r"^\[!([A-Z]+)\]\s*")
|
|
808
902
|
match = pattern.match(content.text)
|
|
809
903
|
if not match:
|
|
810
|
-
raise DocumentError("not a GitHub alert")
|
|
904
|
+
raise DocumentError(blockquote, "not a GitHub alert")
|
|
905
|
+
alert = match.group(1)
|
|
811
906
|
|
|
812
907
|
# remove alert indicator prefix
|
|
813
908
|
content.text = content.text[len(match.group(0)) :]
|
|
814
909
|
|
|
815
|
-
for e in blockquote:
|
|
816
|
-
self.visit(e)
|
|
817
|
-
|
|
818
|
-
alert = match.group(1)
|
|
819
910
|
if self.options.use_panel:
|
|
820
|
-
return self._transform_panel(
|
|
911
|
+
return self._transform_panel(blockquote, alert.lower())
|
|
821
912
|
else:
|
|
822
913
|
alert_to_csf = {"NOTE": "info", "TIP": "tip", "IMPORTANT": "note", "WARNING": "note", "CAUTION": "warning"}
|
|
823
914
|
class_name = alert_to_csf.get(alert)
|
|
824
915
|
if class_name is None:
|
|
825
|
-
raise DocumentError(f"unsupported GitHub alert: {alert}")
|
|
916
|
+
raise DocumentError(blockquote, f"unsupported GitHub alert: {alert}")
|
|
826
917
|
|
|
827
918
|
return self._transform_alert(blockquote, class_name)
|
|
828
919
|
|
|
@@ -834,32 +925,32 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
834
925
|
This syntax does not use Hugo shortcode.
|
|
835
926
|
"""
|
|
836
927
|
|
|
928
|
+
for e in blockquote:
|
|
929
|
+
self.visit(e)
|
|
930
|
+
|
|
837
931
|
if len(blockquote) < 1:
|
|
838
|
-
raise DocumentError("empty GitLab alert")
|
|
932
|
+
raise DocumentError(blockquote, "empty GitLab alert")
|
|
839
933
|
|
|
840
934
|
content = blockquote[0]
|
|
841
935
|
if content.text is None:
|
|
842
|
-
raise DocumentError("empty content for GitLab alert")
|
|
936
|
+
raise DocumentError(blockquote, "empty content for GitLab alert")
|
|
843
937
|
|
|
844
938
|
pattern = re.compile(r"^(FLAG|NOTE|WARNING|DISCLAIMER):\s*")
|
|
845
939
|
match = pattern.match(content.text)
|
|
846
940
|
if not match:
|
|
847
|
-
raise DocumentError("not a GitLab alert")
|
|
941
|
+
raise DocumentError(blockquote, "not a GitLab alert")
|
|
942
|
+
alert = match.group(1)
|
|
848
943
|
|
|
849
944
|
# remove alert indicator prefix
|
|
850
945
|
content.text = content.text[len(match.group(0)) :]
|
|
851
946
|
|
|
852
|
-
for e in blockquote:
|
|
853
|
-
self.visit(e)
|
|
854
|
-
|
|
855
|
-
alert = match.group(1)
|
|
856
947
|
if self.options.use_panel:
|
|
857
|
-
return self._transform_panel(
|
|
948
|
+
return self._transform_panel(blockquote, alert.lower())
|
|
858
949
|
else:
|
|
859
950
|
alert_to_csf = {"FLAG": "note", "NOTE": "info", "WARNING": "note", "DISCLAIMER": "info"}
|
|
860
951
|
class_name = alert_to_csf.get(alert)
|
|
861
952
|
if class_name is None:
|
|
862
|
-
raise DocumentError(f"unsupported GitLab alert: {alert}")
|
|
953
|
+
raise DocumentError(blockquote, f"unsupported GitLab alert: {alert}")
|
|
863
954
|
|
|
864
955
|
return self._transform_alert(blockquote, class_name)
|
|
865
956
|
|
|
@@ -903,12 +994,12 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
903
994
|
AC_ELEM("rich-text-body", {}, *list(blockquote)),
|
|
904
995
|
)
|
|
905
996
|
|
|
906
|
-
def _transform_panel(self,
|
|
997
|
+
def _transform_panel(self, elem: ElementType, class_name: str) -> ElementType:
|
|
907
998
|
"Transforms a blockquote into a themed panel."
|
|
908
999
|
|
|
909
1000
|
panel = ConfluencePanel.from_class.get(class_name)
|
|
910
1001
|
if panel is None:
|
|
911
|
-
raise DocumentError(f"unsupported panel class: {class_name}")
|
|
1002
|
+
raise DocumentError(elem, f"unsupported panel class: {class_name}")
|
|
912
1003
|
|
|
913
1004
|
macro_id = str(uuid.uuid4())
|
|
914
1005
|
return AC_ELEM(
|
|
@@ -922,7 +1013,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
922
1013
|
AC_ELEM("parameter", {AC_ATTR("name"): "panelIconId"}, panel.emoji_unicode),
|
|
923
1014
|
AC_ELEM("parameter", {AC_ATTR("name"): "panelIconText"}, panel.emoji),
|
|
924
1015
|
AC_ELEM("parameter", {AC_ATTR("name"): "bgColor"}, panel.background_color),
|
|
925
|
-
AC_ELEM("rich-text-body", {}, *
|
|
1016
|
+
AC_ELEM("rich-text-body", {}, *list(elem)),
|
|
926
1017
|
)
|
|
927
1018
|
|
|
928
1019
|
def _transform_collapsed(self, details: ElementType) -> ElementType:
|
|
@@ -936,7 +1027,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
936
1027
|
|
|
937
1028
|
summary = details[0]
|
|
938
1029
|
if summary.tag != "summary":
|
|
939
|
-
raise DocumentError("expected: `<summary>` as first direct child of `<details>`")
|
|
1030
|
+
raise DocumentError(details, "expected: `<summary>` as first direct child of `<details>`")
|
|
940
1031
|
if details.text is not None or summary.tail is not None:
|
|
941
1032
|
# when `<details>` has attribute `markdown=1`, content is parsed as Markdown:
|
|
942
1033
|
# ```
|
|
@@ -952,7 +1043,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
952
1043
|
# <summary>...</summary>
|
|
953
1044
|
# Text with *emphasis*.
|
|
954
1045
|
# </details>
|
|
955
|
-
raise DocumentError('expected: attribute `markdown="1"` on `<details>`')
|
|
1046
|
+
raise DocumentError(details, 'expected: attribute `markdown="1"` on `<details>`')
|
|
956
1047
|
|
|
957
1048
|
summary_text = element_to_text(summary)
|
|
958
1049
|
details.remove(summary)
|
|
@@ -1020,29 +1111,22 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1020
1111
|
|
|
1021
1112
|
content = elem.text
|
|
1022
1113
|
if not content:
|
|
1023
|
-
raise DocumentError("empty LaTeX formula")
|
|
1114
|
+
raise DocumentError(elem, "empty LaTeX formula")
|
|
1024
1115
|
|
|
1025
1116
|
image_data = render_latex(content, format=self.options.diagram_output_format)
|
|
1026
1117
|
if self.options.diagram_output_format == "png":
|
|
1027
|
-
width, height = extract_png_dimensions(data=image_data)
|
|
1028
1118
|
image_data = remove_png_chunks(["pHYs"], source_data=image_data)
|
|
1029
|
-
attrs = ImageAttributes(
|
|
1030
|
-
context,
|
|
1031
|
-
width=width,
|
|
1032
|
-
height=height,
|
|
1033
|
-
alt=content,
|
|
1034
|
-
title=None,
|
|
1035
|
-
caption="",
|
|
1036
|
-
alignment=ImageAlignment(self.options.layout.get_image_alignment()),
|
|
1037
|
-
)
|
|
1038
|
-
else:
|
|
1039
|
-
attrs = ImageAttributes.empty(context)
|
|
1040
1119
|
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1120
|
+
attrs = ImageAttributes(
|
|
1121
|
+
context,
|
|
1122
|
+
width=None,
|
|
1123
|
+
height=None,
|
|
1124
|
+
alt=content,
|
|
1125
|
+
title=None,
|
|
1126
|
+
caption="",
|
|
1127
|
+
alignment=ImageAlignment(self.options.layout.get_image_alignment()),
|
|
1128
|
+
)
|
|
1129
|
+
return self.image_generator.transform_attached_data(image_data, attrs, image_type="formula")
|
|
1046
1130
|
|
|
1047
1131
|
def _transform_inline_math(self, elem: ElementType) -> ElementType:
|
|
1048
1132
|
"""
|
|
@@ -1053,7 +1137,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1053
1137
|
|
|
1054
1138
|
content = elem.text
|
|
1055
1139
|
if not content:
|
|
1056
|
-
raise DocumentError("empty inline LaTeX formula")
|
|
1140
|
+
raise DocumentError(elem, "empty inline LaTeX formula")
|
|
1057
1141
|
|
|
1058
1142
|
LOGGER.debug("Found inline LaTeX formula: %s", content)
|
|
1059
1143
|
|
|
@@ -1088,7 +1172,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1088
1172
|
|
|
1089
1173
|
content = elem.text
|
|
1090
1174
|
if not content:
|
|
1091
|
-
raise DocumentError("empty block-level LaTeX formula")
|
|
1175
|
+
raise DocumentError(elem, "empty block-level LaTeX formula")
|
|
1092
1176
|
|
|
1093
1177
|
LOGGER.debug("Found block-level LaTeX formula: %s", content)
|
|
1094
1178
|
|
|
@@ -1133,13 +1217,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1133
1217
|
"""
|
|
1134
1218
|
|
|
1135
1219
|
if elem.tag != "sup":
|
|
1136
|
-
raise DocumentError("expected: `<sup>` as the HTML element for a footnote reference")
|
|
1220
|
+
raise DocumentError(elem, "expected: `<sup>` as the HTML element for a footnote reference")
|
|
1137
1221
|
|
|
1138
1222
|
ref_id = elem.attrib.pop("id", "")
|
|
1139
1223
|
# Match fnref:NAME, fnref2:NAME, fnref3:NAME, etc.
|
|
1140
|
-
match =
|
|
1224
|
+
match = _FOOTNOTE_REF_REGEXP.match(ref_id)
|
|
1141
1225
|
if match is None:
|
|
1142
|
-
raise DocumentError("expected: attribute `id` of format `fnref:NAME` or `fnrefN:NAME` applied on `<sup>` for a footnote reference")
|
|
1226
|
+
raise DocumentError(elem, "expected: attribute `id` of format `fnref:NAME` or `fnrefN:NAME` applied on `<sup>` for a footnote reference")
|
|
1143
1227
|
numeric_suffix = match.group(1)
|
|
1144
1228
|
footnote_name = match.group(2)
|
|
1145
1229
|
# Build anchor name: first reference uses NAME, subsequent references use NAME-N
|
|
@@ -1147,10 +1231,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1147
1231
|
|
|
1148
1232
|
link = next((elem.iterchildren(tag="a")), None)
|
|
1149
1233
|
if link is None:
|
|
1150
|
-
raise DocumentError("expected: `<a>` as the first HTML element in a footnote reference")
|
|
1234
|
+
raise DocumentError(elem, "expected: `<a>` as the first HTML element in a footnote reference")
|
|
1151
1235
|
def_href = link.attrib.pop("href", "")
|
|
1152
1236
|
if not def_href.startswith("#fn:"):
|
|
1153
|
-
raise DocumentError("expected: attribute `href` of format `#fn:NAME` applied on `<a>` for a footnote reference")
|
|
1237
|
+
raise DocumentError(elem, "expected: attribute `href` of format `#fn:NAME` applied on `<a>` for a footnote reference")
|
|
1154
1238
|
footnote_def = def_href.removeprefix("#fn:")
|
|
1155
1239
|
|
|
1156
1240
|
text = link.text or ""
|
|
@@ -1217,21 +1301,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1217
1301
|
|
|
1218
1302
|
ordered_list = next((elem.iterchildren(tag="ol")), None)
|
|
1219
1303
|
if ordered_list is None:
|
|
1220
|
-
raise DocumentError("expected: `<ol>` as direct child of footnote definition block")
|
|
1304
|
+
raise DocumentError(elem, "expected: `<ol>` as direct child of footnote definition block")
|
|
1221
1305
|
|
|
1222
1306
|
for list_item in ordered_list:
|
|
1223
1307
|
if list_item.tag != "li":
|
|
1224
|
-
raise DocumentError("expected: `<li>` as children of `<ol>` in footnote definition block")
|
|
1308
|
+
raise DocumentError(elem, "expected: `<li>` as children of `<ol>` in footnote definition block")
|
|
1225
1309
|
|
|
1226
1310
|
def_id = list_item.attrib.pop("id", "")
|
|
1227
1311
|
if not def_id.startswith("fn:"):
|
|
1228
|
-
raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
|
|
1312
|
+
raise DocumentError(elem, "expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
|
|
1229
1313
|
footnote_def = def_id.removeprefix("fn:")
|
|
1230
1314
|
|
|
1231
1315
|
# find the last paragraph, which is where the backref links are placed
|
|
1232
1316
|
paragraphs = list(list_item.iterchildren(tag="p"))
|
|
1233
1317
|
if not paragraphs:
|
|
1234
|
-
raise DocumentError("expected: `<p>` as a child of `<li>` in a footnote definition")
|
|
1318
|
+
raise DocumentError(elem, "expected: `<p>` as a child of `<li>` in a footnote definition")
|
|
1235
1319
|
last_paragraph = paragraphs[-1]
|
|
1236
1320
|
|
|
1237
1321
|
# collect all backref anchors (there may be multiple when a footnote is referenced multiple times)
|
|
@@ -1240,13 +1324,12 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1240
1324
|
backref_info: list[tuple[ElementType, int | None, str]] = []
|
|
1241
1325
|
for anchor in list(last_paragraph.iterchildren(tag="a")):
|
|
1242
1326
|
href = anchor.get("href", "")
|
|
1243
|
-
match
|
|
1244
|
-
if match is not None:
|
|
1327
|
+
if href.startswith("#") and (match := _FOOTNOTE_REF_REGEXP.match(href[1:])) is not None:
|
|
1245
1328
|
backref_info.append((anchor, int(match.group(1), base=10) if match.group(1) else None, match.group(2)))
|
|
1246
1329
|
|
|
1247
1330
|
if not backref_info:
|
|
1248
1331
|
raise DocumentError(
|
|
1249
|
-
"expected: at least one `<a>` element with `href` attribute of format `#fnref:NAME` or `#fnrefN:NAME` in a footnote definition"
|
|
1332
|
+
elem, "expected: at least one `<a>` element with `href` attribute of format `#fnref:NAME` or `#fnrefN:NAME` in a footnote definition"
|
|
1250
1333
|
)
|
|
1251
1334
|
|
|
1252
1335
|
# remove all back-links generated by Python-Markdown
|
|
@@ -1313,19 +1396,19 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1313
1396
|
"""
|
|
1314
1397
|
|
|
1315
1398
|
if elem.tag != "ul":
|
|
1316
|
-
raise DocumentError("expected: `<ul>` as the HTML element for a tasklist")
|
|
1399
|
+
raise DocumentError(elem, "expected: `<ul>` as the HTML element for a tasklist")
|
|
1317
1400
|
|
|
1318
1401
|
for item in elem:
|
|
1319
1402
|
if item.tag != "li":
|
|
1320
|
-
raise DocumentError("expected: `<li>` as the HTML element for a task")
|
|
1321
|
-
if not
|
|
1322
|
-
raise DocumentError("expected: each `<li>` in a task list starting with [ ] or [x]")
|
|
1403
|
+
raise DocumentError(elem, "expected: `<li>` as the HTML element for a task")
|
|
1404
|
+
if not _TASKLIST_REGEXP.match(item.text or ""):
|
|
1405
|
+
raise DocumentError(elem, "expected: each `<li>` in a task list starting with [ ] or [x]")
|
|
1323
1406
|
|
|
1324
1407
|
tasks: list[ElementType] = []
|
|
1325
1408
|
for index, item in enumerate(elem, start=1):
|
|
1326
1409
|
if item.text is None:
|
|
1327
1410
|
raise NotImplementedError("pre-condition check for tasklist not exhaustive")
|
|
1328
|
-
match =
|
|
1411
|
+
match = _TASKLIST_REGEXP.match(item.text)
|
|
1329
1412
|
if match is None:
|
|
1330
1413
|
raise NotImplementedError("pre-condition check for tasklist not exhaustive")
|
|
1331
1414
|
|
|
@@ -1350,7 +1433,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1350
1433
|
return AC_ELEM("task-list", {}, *tasks)
|
|
1351
1434
|
|
|
1352
1435
|
@override
|
|
1353
|
-
def transform(self, child: ElementType) -> ElementType |
|
|
1436
|
+
def transform(self, child: ElementType) -> ElementType | ElementAction:
|
|
1354
1437
|
"""
|
|
1355
1438
|
Transforms an HTML element tree obtained from a Markdown document into a Confluence Storage Format element tree.
|
|
1356
1439
|
"""
|
|
@@ -1362,13 +1445,17 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1362
1445
|
child.tail = child.tail.replace("\n", " ")
|
|
1363
1446
|
|
|
1364
1447
|
if not isinstance(child.tag, str):
|
|
1365
|
-
return
|
|
1448
|
+
return ElementAction.RECURSE
|
|
1366
1449
|
|
|
1367
1450
|
match child.tag:
|
|
1451
|
+
# <line-number value="#" />
|
|
1452
|
+
case "line-number":
|
|
1453
|
+
return ElementAction.REMOVE
|
|
1454
|
+
|
|
1368
1455
|
# <p>...</p>
|
|
1369
1456
|
case "p":
|
|
1370
1457
|
# <p><img src="..." /></p>
|
|
1371
|
-
if
|
|
1458
|
+
if child_count(child) == 1 and not child.text and child[0].tag == "img" and not child[0].tail:
|
|
1372
1459
|
return self._transform_image(FormattingContext.BLOCK, child[0])
|
|
1373
1460
|
|
|
1374
1461
|
# <p>[[<em>TOC</em>]]</p> (represented in Markdown as `[[_TOC_]]`)
|
|
@@ -1390,7 +1477,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1390
1477
|
# <div><ac:structured-macro ...>...</ac:structured-macro></div>
|
|
1391
1478
|
elif "csf" in classes:
|
|
1392
1479
|
if len(child) != 1:
|
|
1393
|
-
raise DocumentError("expected: single child in Confluence Storage Format block")
|
|
1480
|
+
raise DocumentError(child, "expected: single child in Confluence Storage Format block")
|
|
1394
1481
|
|
|
1395
1482
|
return child[0]
|
|
1396
1483
|
|
|
@@ -1402,7 +1489,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1402
1489
|
# </div>
|
|
1403
1490
|
elif "footnote" in classes:
|
|
1404
1491
|
self._transform_footnote_def(child)
|
|
1405
|
-
return
|
|
1492
|
+
return ElementAction.RECURSE
|
|
1406
1493
|
|
|
1407
1494
|
# <div class="admonition note">
|
|
1408
1495
|
# <p class="admonition-title">Note</p>
|
|
@@ -1444,7 +1531,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1444
1531
|
case "ol":
|
|
1445
1532
|
# Confluence adds the attribute `start` for every ordered list
|
|
1446
1533
|
child.set("start", "1")
|
|
1447
|
-
return
|
|
1534
|
+
return ElementAction.RECURSE
|
|
1448
1535
|
|
|
1449
1536
|
# <ul>
|
|
1450
1537
|
# <li>[ ] ...</li>
|
|
@@ -1454,11 +1541,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1454
1541
|
if len(child) > 0 and all(element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]) for item in child):
|
|
1455
1542
|
return self._transform_tasklist(child)
|
|
1456
1543
|
|
|
1457
|
-
return
|
|
1544
|
+
return ElementAction.RECURSE
|
|
1458
1545
|
|
|
1459
1546
|
case "li":
|
|
1460
1547
|
normalize_inline(child)
|
|
1461
|
-
return
|
|
1548
|
+
return ElementAction.RECURSE
|
|
1462
1549
|
|
|
1463
1550
|
# <pre><code class="language-java"> ... </code></pre>
|
|
1464
1551
|
case "pre" if len(child) == 1 and child[0].tag == "code":
|
|
@@ -1479,7 +1566,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1479
1566
|
if self.options.layout.table.width:
|
|
1480
1567
|
child.set("data-table-width", str(self.options.layout.table.width))
|
|
1481
1568
|
|
|
1482
|
-
return
|
|
1569
|
+
return ElementAction.RECURSE
|
|
1483
1570
|
|
|
1484
1571
|
# <img src="..." alt="..." />
|
|
1485
1572
|
case "img":
|
|
@@ -1487,7 +1574,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1487
1574
|
|
|
1488
1575
|
# <a href="..."> ... </a>
|
|
1489
1576
|
case "a":
|
|
1490
|
-
|
|
1577
|
+
link = self._transform_link(child)
|
|
1578
|
+
if link is not None:
|
|
1579
|
+
return link
|
|
1580
|
+
else:
|
|
1581
|
+
return ElementAction.RECURSE
|
|
1491
1582
|
|
|
1492
1583
|
# <mark>...</mark>
|
|
1493
1584
|
case "mark":
|
|
@@ -1503,9 +1594,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1503
1594
|
|
|
1504
1595
|
# <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
|
|
1505
1596
|
# Multiple references: <sup id="fnref2:NAME">...</sup>, <sup id="fnref3:NAME">...</sup>
|
|
1506
|
-
case "sup" if
|
|
1597
|
+
case "sup" if _FOOTNOTE_REF_REGEXP.match(child.get("id", "")):
|
|
1507
1598
|
self._transform_footnote_ref(child)
|
|
1508
|
-
return
|
|
1599
|
+
return ElementAction.RECURSE
|
|
1509
1600
|
|
|
1510
1601
|
# <input type="date" value="1984-01-01" />
|
|
1511
1602
|
case "input" if child.get("type", "") == "date":
|
|
@@ -1516,6 +1607,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1516
1607
|
# Confluence prefers <u> over <ins> for underline, and replaces <ins> with <u>
|
|
1517
1608
|
child.tag = "u"
|
|
1518
1609
|
|
|
1610
|
+
# <confluence-skip>...</confluence-skip>
|
|
1611
|
+
case "confluence-skip":
|
|
1612
|
+
# Content marked for exclusion from Confluence
|
|
1613
|
+
return ElementAction.REMOVE
|
|
1614
|
+
|
|
1519
1615
|
# <x-emoji data-shortname="wink" data-unicode="1f609">😉</x-emoji>
|
|
1520
1616
|
case "x-emoji":
|
|
1521
1617
|
return self._transform_emoji(child)
|
|
@@ -1529,19 +1625,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1529
1625
|
|
|
1530
1626
|
if self.options.heading_anchors:
|
|
1531
1627
|
self._transform_heading(child)
|
|
1532
|
-
return
|
|
1628
|
+
return ElementAction.RECURSE
|
|
1533
1629
|
case _:
|
|
1534
1630
|
pass
|
|
1535
1631
|
|
|
1536
|
-
return
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
class DocumentError(RuntimeError):
|
|
1540
|
-
"Raised when a converted Markdown document has an unexpected element or attribute."
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
class ConversionError(RuntimeError):
|
|
1544
|
-
"Raised when a Markdown document cannot be converted to Confluence Storage Format."
|
|
1632
|
+
return ElementAction.RECURSE
|
|
1545
1633
|
|
|
1546
1634
|
|
|
1547
1635
|
class ConfluenceDocument:
|
|
@@ -1602,11 +1690,21 @@ class ConfluenceDocument:
|
|
|
1602
1690
|
lines: list[str] = []
|
|
1603
1691
|
for data_uri, color in status_images.items():
|
|
1604
1692
|
lines.append(f"[STATUS-{color.upper()}]: {data_uri}")
|
|
1605
|
-
|
|
1693
|
+
|
|
1694
|
+
if options.line_numbers:
|
|
1695
|
+
lines.extend(markdown_with_line_numbers(document.text.splitlines(), document.start_line_number))
|
|
1696
|
+
else:
|
|
1697
|
+
lines.append(document.text)
|
|
1606
1698
|
|
|
1607
1699
|
# parse Markdown document and convert to HTML
|
|
1608
1700
|
html = markdown_to_html("\n".join(lines))
|
|
1609
1701
|
|
|
1702
|
+
try:
|
|
1703
|
+
# Transform skip markers in HTML string before parsing
|
|
1704
|
+
html = transform_skip_comments_in_html(html)
|
|
1705
|
+
except PreprocessingError as ex:
|
|
1706
|
+
raise ConversionError(f"failed to convert Markdown file: {path}") from ex
|
|
1707
|
+
|
|
1610
1708
|
# modify HTML as necessary
|
|
1611
1709
|
if self.options.generated_by is not None:
|
|
1612
1710
|
generated_by = props.generated_by or self.options.generated_by
|
|
@@ -1641,6 +1739,21 @@ class ConfluenceDocument:
|
|
|
1641
1739
|
# execute HTML-to-Confluence converter
|
|
1642
1740
|
try:
|
|
1643
1741
|
converter.visit(self.root)
|
|
1742
|
+
except DocumentError as ex:
|
|
1743
|
+
if options.line_numbers:
|
|
1744
|
+
# find closest paragraph ancestor
|
|
1745
|
+
elem = ex.element
|
|
1746
|
+
while elem.tag != "p" and (parent := elem.getparent()):
|
|
1747
|
+
elem = parent
|
|
1748
|
+
|
|
1749
|
+
# locate line number marker element
|
|
1750
|
+
line_number = 0
|
|
1751
|
+
for placeholder in elem.iterchildren("line-number"):
|
|
1752
|
+
line_number = int(placeholder.attrib["value"])
|
|
1753
|
+
|
|
1754
|
+
raise ConversionError(f"failed to convert Markdown file: {path} @ line {line_number}") from ex
|
|
1755
|
+
else:
|
|
1756
|
+
raise ConversionError(f"failed to convert Markdown file: {path}") from ex
|
|
1644
1757
|
except RuntimeError as ex:
|
|
1645
1758
|
raise ConversionError(f"failed to convert Markdown file: {path}") from ex
|
|
1646
1759
|
|
|
@@ -1671,40 +1784,19 @@ class ConfluenceDocument:
|
|
|
1671
1784
|
Handles the case where a generated-by info panel may be present as the first child.
|
|
1672
1785
|
"""
|
|
1673
1786
|
|
|
1674
|
-
#
|
|
1787
|
+
# find the first heading element (h1-h6) in the root
|
|
1675
1788
|
heading_pattern = re.compile(r"^h[1-6]$", re.IGNORECASE)
|
|
1676
1789
|
|
|
1677
|
-
for
|
|
1790
|
+
for child in self.root:
|
|
1678
1791
|
if not isinstance(child.tag, str):
|
|
1679
1792
|
continue
|
|
1680
1793
|
|
|
1681
1794
|
if heading_pattern.match(child.tag) is None:
|
|
1682
1795
|
continue
|
|
1683
1796
|
|
|
1684
|
-
|
|
1685
|
-
tail = child.tail
|
|
1686
|
-
|
|
1687
|
-
# Remove the heading
|
|
1688
|
-
self.root.remove(child)
|
|
1689
|
-
|
|
1690
|
-
# If there was tail text, attach it to the previous sibling's tail
|
|
1691
|
-
# or to the parent's text if this was the first child
|
|
1692
|
-
if tail:
|
|
1693
|
-
if idx > 0:
|
|
1694
|
-
# Append to previous sibling's tail
|
|
1695
|
-
prev_sibling = self.root[idx - 1]
|
|
1696
|
-
if prev_sibling.tail:
|
|
1697
|
-
prev_sibling.tail += tail
|
|
1698
|
-
else:
|
|
1699
|
-
prev_sibling.tail = tail
|
|
1700
|
-
else:
|
|
1701
|
-
# No previous sibling, append to parent's text
|
|
1702
|
-
if self.root.text:
|
|
1703
|
-
self.root.text += tail
|
|
1704
|
-
else:
|
|
1705
|
-
self.root.text = tail
|
|
1797
|
+
remove_element(child)
|
|
1706
1798
|
|
|
1707
|
-
#
|
|
1799
|
+
# only remove the FIRST heading, then stop
|
|
1708
1800
|
break
|
|
1709
1801
|
|
|
1710
1802
|
def xhtml(self) -> str:
|