markdown-to-confluence 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/METADATA +160 -11
- markdown_to_confluence-0.5.3.dist-info/RECORD +55 -0
- {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/licenses/LICENSE +1 -1
- md2conf/__init__.py +2 -2
- md2conf/__main__.py +94 -29
- md2conf/api.py +55 -10
- md2conf/attachment.py +72 -0
- md2conf/coalesce.py +43 -0
- md2conf/collection.py +1 -1
- md2conf/{extra.py → compatibility.py} +1 -1
- md2conf/converter.py +417 -590
- md2conf/csf.py +13 -11
- md2conf/drawio/__init__.py +0 -0
- md2conf/drawio/extension.py +116 -0
- md2conf/{drawio.py → drawio/render.py} +1 -1
- md2conf/emoticon.py +3 -3
- md2conf/environment.py +2 -2
- md2conf/extension.py +78 -0
- md2conf/external.py +49 -0
- md2conf/formatting.py +135 -0
- md2conf/frontmatter.py +70 -0
- md2conf/image.py +127 -0
- md2conf/latex.py +7 -186
- md2conf/local.py +8 -8
- md2conf/markdown.py +1 -1
- md2conf/matcher.py +1 -1
- md2conf/mermaid/__init__.py +0 -0
- md2conf/mermaid/config.py +20 -0
- md2conf/mermaid/extension.py +109 -0
- md2conf/{mermaid.py → mermaid/render.py} +10 -38
- md2conf/mermaid/scanner.py +55 -0
- md2conf/metadata.py +1 -1
- md2conf/options.py +116 -0
- md2conf/plantuml/__init__.py +0 -0
- md2conf/plantuml/config.py +20 -0
- md2conf/plantuml/extension.py +158 -0
- md2conf/plantuml/render.py +139 -0
- md2conf/plantuml/scanner.py +56 -0
- md2conf/png.py +202 -0
- md2conf/processor.py +32 -11
- md2conf/publisher.py +17 -18
- md2conf/scanner.py +31 -128
- md2conf/serializer.py +2 -2
- md2conf/svg.py +341 -0
- md2conf/text.py +1 -1
- md2conf/toc.py +1 -1
- md2conf/uri.py +1 -1
- md2conf/xml.py +1 -1
- markdown_to_confluence-0.5.1.dist-info/RECORD +0 -35
- md2conf/domain.py +0 -52
- {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/WHEEL +0 -0
- {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/entry_points.txt +0 -0
- {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/top_level.txt +0 -0
- {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/zip-safe +0 -0
md2conf/converter.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Publish Markdown files to Confluence wiki.
|
|
3
3
|
|
|
4
|
-
Copyright 2022-
|
|
4
|
+
Copyright 2022-2026, Levente Hunyadi
|
|
5
5
|
|
|
6
6
|
:see: https://github.com/hunyadi/md2conf
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import
|
|
10
|
-
import enum
|
|
9
|
+
import copy
|
|
11
10
|
import hashlib
|
|
12
11
|
import logging
|
|
13
12
|
import os.path
|
|
@@ -16,24 +15,30 @@ import uuid
|
|
|
16
15
|
from abc import ABC, abstractmethod
|
|
17
16
|
from dataclasses import dataclass
|
|
18
17
|
from pathlib import Path
|
|
19
|
-
from typing import ClassVar
|
|
18
|
+
from typing import ClassVar
|
|
20
19
|
from urllib.parse import ParseResult, quote_plus, urlparse
|
|
21
20
|
|
|
22
21
|
import lxml.etree as ET
|
|
23
|
-
from cattrs import BaseValidationError
|
|
24
22
|
|
|
25
|
-
from . import
|
|
23
|
+
from .attachment import AttachmentCatalog, EmbeddedFileData, ImageData, attachment_name
|
|
24
|
+
from .coalesce import coalesce
|
|
26
25
|
from .collection import ConfluencePageCollection
|
|
26
|
+
from .compatibility import override, path_relative_to
|
|
27
27
|
from .csf import AC_ATTR, AC_ELEM, HTML, RI_ATTR, RI_ELEM, ParseError, elements_from_strings, elements_to_string, normalize_inline
|
|
28
|
-
from .
|
|
28
|
+
from .drawio.extension import DrawioExtension
|
|
29
29
|
from .emoticon import emoji_to_emoticon
|
|
30
30
|
from .environment import PageError
|
|
31
|
-
from .
|
|
32
|
-
from .
|
|
31
|
+
from .extension import ExtensionOptions, MarketplaceExtension
|
|
32
|
+
from .formatting import FormattingContext, ImageAlignment, ImageAttributes
|
|
33
|
+
from .image import ImageGenerator, ImageGeneratorOptions
|
|
34
|
+
from .latex import render_latex
|
|
33
35
|
from .markdown import markdown_to_html
|
|
34
|
-
from .mermaid import
|
|
36
|
+
from .mermaid.extension import MermaidExtension
|
|
35
37
|
from .metadata import ConfluenceSiteMetadata
|
|
36
|
-
from .
|
|
38
|
+
from .options import ConfluencePageID, ConverterOptions, DocumentOptions
|
|
39
|
+
from .plantuml.extension import PlantUMLExtension
|
|
40
|
+
from .png import extract_png_dimensions, remove_png_chunks
|
|
41
|
+
from .scanner import ScannedDocument, Scanner
|
|
37
42
|
from .serializer import JsonType
|
|
38
43
|
from .toc import TableOfContentsBuilder
|
|
39
44
|
from .uri import is_absolute_url, to_uuid_urn
|
|
@@ -42,6 +47,28 @@ from .xml import element_to_text
|
|
|
42
47
|
ElementType = ET._Element # pyright: ignore [reportPrivateUsage]
|
|
43
48
|
|
|
44
49
|
|
|
50
|
+
def apply_generated_by_template(template: str, path: Path) -> str:
|
|
51
|
+
"""Apply template substitution to the generated_by string.
|
|
52
|
+
|
|
53
|
+
Supported placeholders:
|
|
54
|
+
- %{filepath}: Full path to the file (relative to the source directory)
|
|
55
|
+
- %{filename}: Just the filename
|
|
56
|
+
- %{filedir}: Dirname of the full path to the file (relative to the source directory)
|
|
57
|
+
- %{filestem}: Just the filename without the extension
|
|
58
|
+
|
|
59
|
+
:param template: The template string with placeholders
|
|
60
|
+
:param path: The path to the file being converted
|
|
61
|
+
:returns: The template string with placeholders replaced
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
return (
|
|
65
|
+
template.replace("%{filepath}", path.as_posix())
|
|
66
|
+
.replace("%{filename}", path.name)
|
|
67
|
+
.replace("%{filedir}", path.parent.as_posix())
|
|
68
|
+
.replace("%{filestem}", path.stem)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
45
72
|
def get_volatile_attributes() -> list[str]:
|
|
46
73
|
"Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
|
|
47
74
|
|
|
@@ -81,6 +108,12 @@ def is_directory_within(absolute_path: Path, base_path: Path) -> bool:
|
|
|
81
108
|
return absolute_path.as_posix().startswith(base_path.as_posix())
|
|
82
109
|
|
|
83
110
|
|
|
111
|
+
def fix_absolute_path(path: Path, root_path: Path) -> Path:
|
|
112
|
+
"Make absolute path relative to another root path."
|
|
113
|
+
|
|
114
|
+
return root_path / path.relative_to(path.root)
|
|
115
|
+
|
|
116
|
+
|
|
84
117
|
def encode_title(text: str) -> str:
|
|
85
118
|
"Converts a title string such that it is safe to embed into a Confluence URL."
|
|
86
119
|
|
|
@@ -95,6 +128,7 @@ def encode_title(text: str) -> str:
|
|
|
95
128
|
|
|
96
129
|
|
|
97
130
|
# supported code block languages, for which syntax highlighting is available
|
|
131
|
+
# spellchecker: disable
|
|
98
132
|
_LANGUAGES = {
|
|
99
133
|
"abap": "abap",
|
|
100
134
|
"actionscript3": "actionscript3",
|
|
@@ -137,7 +171,6 @@ _LANGUAGES = {
|
|
|
137
171
|
"kotlin": "kotlin",
|
|
138
172
|
"livescript": "livescript",
|
|
139
173
|
"lua": "lua",
|
|
140
|
-
"mermaid": "mermaid",
|
|
141
174
|
"mathematica": "mathematica",
|
|
142
175
|
"matlab": "matlab",
|
|
143
176
|
"objectivec": "objectivec",
|
|
@@ -179,6 +212,7 @@ _LANGUAGES = {
|
|
|
179
212
|
"xquery": "xquery",
|
|
180
213
|
"yaml": "yaml",
|
|
181
214
|
}
|
|
215
|
+
# spellchecker: enable
|
|
182
216
|
|
|
183
217
|
|
|
184
218
|
class NodeVisitor(ABC):
|
|
@@ -241,148 +275,6 @@ def is_placeholder_for(node: ElementType, name: str) -> bool:
|
|
|
241
275
|
return True
|
|
242
276
|
|
|
243
277
|
|
|
244
|
-
@enum.unique
|
|
245
|
-
class FormattingContext(enum.Enum):
|
|
246
|
-
"Identifies the formatting context for the element."
|
|
247
|
-
|
|
248
|
-
BLOCK = "block"
|
|
249
|
-
INLINE = "inline"
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
@enum.unique
|
|
253
|
-
class ImageAlignment(enum.Enum):
|
|
254
|
-
"Determines whether to align block-level images to center, left or right."
|
|
255
|
-
|
|
256
|
-
CENTER = "center"
|
|
257
|
-
LEFT = "left"
|
|
258
|
-
RIGHT = "right"
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
@dataclass
|
|
262
|
-
class ImageAttributes:
|
|
263
|
-
"""
|
|
264
|
-
Attributes applied to an `<img>` element.
|
|
265
|
-
|
|
266
|
-
:param context: Identifies the formatting context for the element (block or inline).
|
|
267
|
-
:param width: Natural image width in pixels.
|
|
268
|
-
:param height: Natural image height in pixels.
|
|
269
|
-
:param alt: Alternate text.
|
|
270
|
-
:param title: Title text (a.k.a. image tooltip).
|
|
271
|
-
:param caption: Caption text (shown below figure).
|
|
272
|
-
:param alignment: Alignment for block-level images.
|
|
273
|
-
"""
|
|
274
|
-
|
|
275
|
-
context: FormattingContext
|
|
276
|
-
width: int | None
|
|
277
|
-
height: int | None
|
|
278
|
-
alt: str | None
|
|
279
|
-
title: str | None
|
|
280
|
-
caption: str | None
|
|
281
|
-
alignment: ImageAlignment = ImageAlignment.CENTER
|
|
282
|
-
|
|
283
|
-
def __post_init__(self) -> None:
|
|
284
|
-
if self.caption is None and self.context is FormattingContext.BLOCK:
|
|
285
|
-
self.caption = self.title or self.alt
|
|
286
|
-
|
|
287
|
-
def as_dict(self) -> dict[str, str]:
|
|
288
|
-
attributes: dict[str, str] = {}
|
|
289
|
-
if self.context is FormattingContext.BLOCK:
|
|
290
|
-
if self.alignment is ImageAlignment.LEFT:
|
|
291
|
-
attributes[AC_ATTR("align")] = "left"
|
|
292
|
-
attributes[AC_ATTR("layout")] = "align-start"
|
|
293
|
-
elif self.alignment is ImageAlignment.RIGHT:
|
|
294
|
-
attributes[AC_ATTR("align")] = "right"
|
|
295
|
-
attributes[AC_ATTR("layout")] = "align-end"
|
|
296
|
-
else:
|
|
297
|
-
attributes[AC_ATTR("align")] = "center"
|
|
298
|
-
attributes[AC_ATTR("layout")] = "center"
|
|
299
|
-
|
|
300
|
-
if self.width is not None:
|
|
301
|
-
attributes[AC_ATTR("original-width")] = str(self.width)
|
|
302
|
-
if self.height is not None:
|
|
303
|
-
attributes[AC_ATTR("original-height")] = str(self.height)
|
|
304
|
-
if self.width is not None:
|
|
305
|
-
attributes[AC_ATTR("custom-width")] = "true"
|
|
306
|
-
attributes[AC_ATTR("width")] = str(self.width)
|
|
307
|
-
|
|
308
|
-
elif self.context is FormattingContext.INLINE:
|
|
309
|
-
if self.width is not None:
|
|
310
|
-
attributes[AC_ATTR("width")] = str(self.width)
|
|
311
|
-
if self.height is not None:
|
|
312
|
-
attributes[AC_ATTR("height")] = str(self.height)
|
|
313
|
-
else:
|
|
314
|
-
raise NotImplementedError("match not exhaustive for enumeration")
|
|
315
|
-
|
|
316
|
-
if self.alt is not None:
|
|
317
|
-
attributes.update({AC_ATTR("alt"): self.alt})
|
|
318
|
-
if self.title is not None:
|
|
319
|
-
attributes.update({AC_ATTR("title"): self.title})
|
|
320
|
-
return attributes
|
|
321
|
-
|
|
322
|
-
EMPTY_BLOCK: ClassVar["ImageAttributes"]
|
|
323
|
-
EMPTY_INLINE: ClassVar["ImageAttributes"]
|
|
324
|
-
|
|
325
|
-
@classmethod
|
|
326
|
-
def empty(cls, context: FormattingContext) -> "ImageAttributes":
|
|
327
|
-
if context is FormattingContext.BLOCK:
|
|
328
|
-
return cls.EMPTY_BLOCK
|
|
329
|
-
elif context is FormattingContext.INLINE:
|
|
330
|
-
return cls.EMPTY_INLINE
|
|
331
|
-
else:
|
|
332
|
-
raise NotImplementedError("match not exhaustive for enumeration")
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
ImageAttributes.EMPTY_BLOCK = ImageAttributes(
|
|
336
|
-
FormattingContext.BLOCK, width=None, height=None, alt=None, title=None, caption=None, alignment=ImageAlignment.CENTER
|
|
337
|
-
)
|
|
338
|
-
ImageAttributes.EMPTY_INLINE = ImageAttributes(
|
|
339
|
-
FormattingContext.INLINE, width=None, height=None, alt=None, title=None, caption=None, alignment=ImageAlignment.CENTER
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
@dataclass
|
|
344
|
-
class ConfluenceConverterOptions:
|
|
345
|
-
"""
|
|
346
|
-
Options for converting an HTML tree into Confluence storage format.
|
|
347
|
-
|
|
348
|
-
:param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
|
|
349
|
-
plain text; when false, raise an exception.
|
|
350
|
-
:param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
|
|
351
|
-
conversion rules for the identifier.
|
|
352
|
-
:param prefer_raster: Whether to choose PNG files over SVG files when available.
|
|
353
|
-
:param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
|
|
354
|
-
:param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
|
|
355
|
-
:param render_latex: Whether to pre-render LaTeX formulas into PNG/SVG images.
|
|
356
|
-
:param diagram_output_format: Target image format for diagrams.
|
|
357
|
-
:param webui_links: When true, convert relative URLs to Confluence Web UI links.
|
|
358
|
-
:param alignment: Alignment for block-level images and formulas.
|
|
359
|
-
:param use_panel: Whether to transform admonitions and alerts into a Confluence custom panel.
|
|
360
|
-
"""
|
|
361
|
-
|
|
362
|
-
ignore_invalid_url: bool = False
|
|
363
|
-
heading_anchors: bool = False
|
|
364
|
-
prefer_raster: bool = True
|
|
365
|
-
render_drawio: bool = False
|
|
366
|
-
render_mermaid: bool = False
|
|
367
|
-
render_latex: bool = False
|
|
368
|
-
diagram_output_format: Literal["png", "svg"] = "png"
|
|
369
|
-
webui_links: bool = False
|
|
370
|
-
alignment: Literal["center", "left", "right"] = "center"
|
|
371
|
-
use_panel: bool = False
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
@dataclass
|
|
375
|
-
class ImageData:
|
|
376
|
-
path: Path
|
|
377
|
-
description: str | None = None
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
@dataclass
|
|
381
|
-
class EmbeddedFileData:
|
|
382
|
-
data: bytes
|
|
383
|
-
description: str | None = None
|
|
384
|
-
|
|
385
|
-
|
|
386
278
|
@dataclass
|
|
387
279
|
class ConfluencePanel:
|
|
388
280
|
emoji: str
|
|
@@ -423,20 +315,22 @@ ConfluencePanel.from_class = {
|
|
|
423
315
|
class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
424
316
|
"Transforms a plain HTML tree into Confluence Storage Format."
|
|
425
317
|
|
|
426
|
-
options:
|
|
318
|
+
options: ConverterOptions
|
|
427
319
|
path: Path
|
|
428
320
|
base_dir: Path
|
|
429
321
|
root_dir: Path
|
|
430
322
|
toc: TableOfContentsBuilder
|
|
431
323
|
links: list[str]
|
|
432
|
-
|
|
433
|
-
embedded_files: dict[str, EmbeddedFileData]
|
|
324
|
+
attachments: AttachmentCatalog
|
|
434
325
|
site_metadata: ConfluenceSiteMetadata
|
|
435
326
|
page_metadata: ConfluencePageCollection
|
|
436
327
|
|
|
328
|
+
image_generator: ImageGenerator
|
|
329
|
+
extensions: list[MarketplaceExtension]
|
|
330
|
+
|
|
437
331
|
def __init__(
|
|
438
332
|
self,
|
|
439
|
-
options:
|
|
333
|
+
options: ConverterOptions,
|
|
440
334
|
path: Path,
|
|
441
335
|
root_dir: Path,
|
|
442
336
|
site_metadata: ConfluenceSiteMetadata,
|
|
@@ -453,11 +347,22 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
453
347
|
self.root_dir = root_dir
|
|
454
348
|
self.toc = TableOfContentsBuilder()
|
|
455
349
|
self.links = []
|
|
456
|
-
self.
|
|
457
|
-
self.embedded_files = {}
|
|
350
|
+
self.attachments = AttachmentCatalog()
|
|
458
351
|
self.site_metadata = site_metadata
|
|
459
352
|
self.page_metadata = page_metadata
|
|
460
353
|
|
|
354
|
+
self.image_generator = ImageGenerator(
|
|
355
|
+
self.base_dir,
|
|
356
|
+
self.attachments,
|
|
357
|
+
ImageGeneratorOptions(self.options.diagram_output_format, self.options.prefer_raster, self.options.layout.image.max_width),
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
self.extensions = [
|
|
361
|
+
DrawioExtension(self.image_generator, ExtensionOptions(render=self.options.render_drawio)),
|
|
362
|
+
MermaidExtension(self.image_generator, ExtensionOptions(render=self.options.render_mermaid)),
|
|
363
|
+
PlantUMLExtension(self.image_generator, ExtensionOptions(render=self.options.render_plantuml)),
|
|
364
|
+
]
|
|
365
|
+
|
|
461
366
|
def _transform_heading(self, heading: ElementType) -> None:
|
|
462
367
|
"""
|
|
463
368
|
Adds anchors to headings in the same document (if *heading anchors* is enabled).
|
|
@@ -545,9 +450,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
545
450
|
|
|
546
451
|
# discard original value: relative links always require transformation
|
|
547
452
|
anchor.attrib.pop("href")
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
453
|
+
if relative_url.path.startswith("/"):
|
|
454
|
+
absolute_path = fix_absolute_path(path=Path(relative_url.path), root_path=self.root_dir).resolve()
|
|
455
|
+
else:
|
|
456
|
+
# convert the relative URL to absolute path based on the base path value
|
|
457
|
+
absolute_path = (self.base_dir / relative_url.path).resolve()
|
|
551
458
|
|
|
552
459
|
# look up the absolute path in the page metadata dictionary to discover the relative path within Confluence that should be used
|
|
553
460
|
if not is_directory_within(absolute_path, self.root_dir):
|
|
@@ -606,7 +513,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
606
513
|
return None
|
|
607
514
|
|
|
608
515
|
file_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
|
|
609
|
-
self.
|
|
516
|
+
self.attachments.add_image(ImageData(absolute_path))
|
|
610
517
|
|
|
611
518
|
link_body = AC_ELEM("link-body", {}, *list(anchor))
|
|
612
519
|
link_body.text = anchor.text
|
|
@@ -668,7 +575,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
668
575
|
pixel_width = int(width) if width is not None and width.isdecimal() else None
|
|
669
576
|
pixel_height = int(height) if height is not None and height.isdecimal() else None
|
|
670
577
|
attrs = ImageAttributes(
|
|
671
|
-
context,
|
|
578
|
+
context,
|
|
579
|
+
width=pixel_width,
|
|
580
|
+
height=pixel_height,
|
|
581
|
+
alt=alt,
|
|
582
|
+
title=title,
|
|
583
|
+
caption=None,
|
|
584
|
+
alignment=ImageAlignment(self.options.layout.get_image_alignment()),
|
|
672
585
|
)
|
|
673
586
|
|
|
674
587
|
if is_absolute_url(src):
|
|
@@ -680,14 +593,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
680
593
|
if absolute_path is None:
|
|
681
594
|
return self._create_missing(path, attrs)
|
|
682
595
|
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
return self._transform_external_mermaid(absolute_path, attrs)
|
|
689
|
-
else:
|
|
690
|
-
return self._transform_attached_image(absolute_path, attrs)
|
|
596
|
+
for extension in self.extensions:
|
|
597
|
+
if extension.matches_image(absolute_path):
|
|
598
|
+
return extension.transform_image(absolute_path, attrs)
|
|
599
|
+
|
|
600
|
+
return self.image_generator.transform_attached_image(absolute_path, attrs)
|
|
691
601
|
|
|
692
602
|
def _transform_external_image(self, url: str, attrs: ImageAttributes) -> ElementType:
|
|
693
603
|
"Emits Confluence Storage Format XHTML for an external image."
|
|
@@ -703,7 +613,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
703
613
|
if attrs.caption:
|
|
704
614
|
elements.append(AC_ELEM("caption", attrs.caption))
|
|
705
615
|
|
|
706
|
-
return AC_ELEM("image", attrs.as_dict(), *elements)
|
|
616
|
+
return AC_ELEM("image", attrs.as_dict(max_width=self.options.layout.image.max_width), *elements)
|
|
707
617
|
|
|
708
618
|
def _warn_or_raise(self, msg: str) -> None:
|
|
709
619
|
"Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
|
|
@@ -716,8 +626,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
716
626
|
def _verify_image_path(self, path: Path) -> Path | None:
|
|
717
627
|
"Checks whether an image path is safe to use."
|
|
718
628
|
|
|
719
|
-
|
|
720
|
-
|
|
629
|
+
if path.is_absolute():
|
|
630
|
+
absolute_path = fix_absolute_path(path=path, root_path=self.root_dir).resolve()
|
|
631
|
+
else:
|
|
632
|
+
# resolve relative path into absolute path w.r.t. base dir
|
|
633
|
+
absolute_path = (self.base_dir / path).resolve()
|
|
721
634
|
|
|
722
635
|
if not absolute_path.exists():
|
|
723
636
|
self._warn_or_raise(f"path to image {path} does not exist")
|
|
@@ -729,117 +642,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
729
642
|
|
|
730
643
|
return absolute_path
|
|
731
644
|
|
|
732
|
-
def _transform_attached_image(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
|
|
733
|
-
"Emits Confluence Storage Format XHTML for an attached raster or vector image."
|
|
734
|
-
|
|
735
|
-
if self.options.prefer_raster and absolute_path.suffix == ".svg":
|
|
736
|
-
# prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
|
|
737
|
-
png_file = absolute_path.with_suffix(".png")
|
|
738
|
-
if png_file.exists():
|
|
739
|
-
absolute_path = png_file
|
|
740
|
-
|
|
741
|
-
self.images.append(ImageData(absolute_path, attrs.alt))
|
|
742
|
-
image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
|
|
743
|
-
return self._create_attached_image(image_name, attrs)
|
|
744
|
-
|
|
745
|
-
def _transform_drawio(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
|
|
746
|
-
"Emits Confluence Storage Format XHTML for a draw.io diagram."
|
|
747
|
-
|
|
748
|
-
if not absolute_path.name.endswith(".drawio.xml") and not absolute_path.name.endswith(".drawio"):
|
|
749
|
-
raise DocumentError("invalid image format; expected: `*.drawio.xml` or `*.drawio`")
|
|
750
|
-
|
|
751
|
-
relative_path = path_relative_to(absolute_path, self.base_dir)
|
|
752
|
-
if self.options.render_drawio:
|
|
753
|
-
image_data = drawio.render_diagram(absolute_path, self.options.diagram_output_format)
|
|
754
|
-
image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
|
|
755
|
-
self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
|
|
756
|
-
return self._create_attached_image(image_filename, attrs)
|
|
757
|
-
else:
|
|
758
|
-
self.images.append(ImageData(absolute_path, attrs.alt))
|
|
759
|
-
image_filename = attachment_name(relative_path)
|
|
760
|
-
return self._create_drawio(image_filename, attrs)
|
|
761
|
-
|
|
762
|
-
def _transform_drawio_image(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
|
|
763
|
-
"Emits Confluence Storage Format XHTML for a draw.io diagram embedded in a PNG or SVG image."
|
|
764
|
-
|
|
765
|
-
if not absolute_path.name.endswith(".drawio.png") and not absolute_path.name.endswith(".drawio.svg"):
|
|
766
|
-
raise DocumentError("invalid image format; expected: `*.drawio.png` or `*.drawio.svg`")
|
|
767
|
-
|
|
768
|
-
if self.options.render_drawio:
|
|
769
|
-
return self._transform_attached_image(absolute_path, attrs)
|
|
770
|
-
else:
|
|
771
|
-
# extract embedded editable diagram and upload as *.drawio
|
|
772
|
-
image_data = drawio.extract_diagram(absolute_path)
|
|
773
|
-
image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
|
|
774
|
-
self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
|
|
775
|
-
|
|
776
|
-
return self._create_drawio(image_filename, attrs)
|
|
777
|
-
|
|
778
|
-
def _create_attached_image(self, image_name: str, attrs: ImageAttributes) -> ElementType:
|
|
779
|
-
"An image embedded into the page, linking to an attachment."
|
|
780
|
-
|
|
781
|
-
elements: list[ElementType] = []
|
|
782
|
-
elements.append(
|
|
783
|
-
RI_ELEM(
|
|
784
|
-
"attachment",
|
|
785
|
-
# refers to an attachment uploaded alongside the page
|
|
786
|
-
{RI_ATTR("filename"): image_name},
|
|
787
|
-
)
|
|
788
|
-
)
|
|
789
|
-
if attrs.caption:
|
|
790
|
-
elements.append(AC_ELEM("caption", attrs.caption))
|
|
791
|
-
|
|
792
|
-
return AC_ELEM("image", attrs.as_dict(), *elements)
|
|
793
|
-
|
|
794
|
-
def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ElementType:
|
|
795
|
-
"A draw.io diagram embedded into the page, linking to an attachment."
|
|
796
|
-
|
|
797
|
-
parameters: list[ElementType] = [
|
|
798
|
-
AC_ELEM(
|
|
799
|
-
"parameter",
|
|
800
|
-
{AC_ATTR("name"): "diagramName"},
|
|
801
|
-
filename,
|
|
802
|
-
),
|
|
803
|
-
]
|
|
804
|
-
if attrs.width is not None:
|
|
805
|
-
parameters.append(
|
|
806
|
-
AC_ELEM(
|
|
807
|
-
"parameter",
|
|
808
|
-
{AC_ATTR("name"): "width"},
|
|
809
|
-
str(attrs.width),
|
|
810
|
-
),
|
|
811
|
-
)
|
|
812
|
-
if attrs.height is not None:
|
|
813
|
-
parameters.append(
|
|
814
|
-
AC_ELEM(
|
|
815
|
-
"parameter",
|
|
816
|
-
{AC_ATTR("name"): "height"},
|
|
817
|
-
str(attrs.height),
|
|
818
|
-
),
|
|
819
|
-
)
|
|
820
|
-
if attrs.alignment is ImageAlignment.CENTER:
|
|
821
|
-
parameters.append(
|
|
822
|
-
AC_ELEM(
|
|
823
|
-
"parameter",
|
|
824
|
-
{AC_ATTR("name"): "pCenter"},
|
|
825
|
-
str(1),
|
|
826
|
-
),
|
|
827
|
-
)
|
|
828
|
-
|
|
829
|
-
local_id = str(uuid.uuid4())
|
|
830
|
-
macro_id = str(uuid.uuid4())
|
|
831
|
-
return AC_ELEM(
|
|
832
|
-
"structured-macro",
|
|
833
|
-
{
|
|
834
|
-
AC_ATTR("name"): "drawio",
|
|
835
|
-
AC_ATTR("schema-version"): "1",
|
|
836
|
-
"data-layout": "default",
|
|
837
|
-
AC_ATTR("local-id"): local_id,
|
|
838
|
-
AC_ATTR("macro-id"): macro_id,
|
|
839
|
-
},
|
|
840
|
-
*parameters,
|
|
841
|
-
)
|
|
842
|
-
|
|
843
645
|
def _create_missing(self, path: Path, attrs: ImageAttributes) -> ElementType:
|
|
844
646
|
"A warning panel for a missing image."
|
|
845
647
|
|
|
@@ -871,6 +673,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
871
673
|
def _transform_code_block(self, code: ElementType) -> ElementType:
|
|
872
674
|
"Transforms a code block."
|
|
873
675
|
|
|
676
|
+
content: str = code.text or ""
|
|
677
|
+
content = content.rstrip()
|
|
678
|
+
|
|
874
679
|
if language_class := code.get("class"):
|
|
875
680
|
if m := re.match("^language-(.*)$", language_class):
|
|
876
681
|
language_name = m.group(1)
|
|
@@ -881,16 +686,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
881
686
|
|
|
882
687
|
# translate name to standard name for (programming) language
|
|
883
688
|
if language_name is not None:
|
|
689
|
+
for extension in self.extensions:
|
|
690
|
+
if extension.matches_fenced(language_name, content):
|
|
691
|
+
return extension.transform_fenced(content)
|
|
692
|
+
|
|
884
693
|
language_id = _LANGUAGES.get(language_name)
|
|
885
694
|
else:
|
|
886
695
|
language_id = None
|
|
887
696
|
|
|
888
|
-
content: str = code.text or ""
|
|
889
|
-
content = content.rstrip()
|
|
890
|
-
|
|
891
|
-
if language_id == "mermaid":
|
|
892
|
-
return self._transform_fenced_mermaid(content)
|
|
893
|
-
|
|
894
697
|
return AC_ELEM(
|
|
895
698
|
"structured-macro",
|
|
896
699
|
{
|
|
@@ -905,84 +708,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
905
708
|
AC_ELEM("plain-text-body", ET.CDATA(content)),
|
|
906
709
|
)
|
|
907
710
|
|
|
908
|
-
def _extract_mermaid_config(self, content: str) -> MermaidConfigProperties | None:
|
|
909
|
-
"""Extract scale from Mermaid YAML front matter configuration."""
|
|
910
|
-
try:
|
|
911
|
-
properties = MermaidScanner().read(content)
|
|
912
|
-
return properties.config
|
|
913
|
-
except BaseValidationError as ex:
|
|
914
|
-
LOGGER.warning("Failed to extract Mermaid properties: %s", ex)
|
|
915
|
-
return None
|
|
916
|
-
|
|
917
|
-
def _transform_external_mermaid(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
|
|
918
|
-
"Emits Confluence Storage Format XHTML for a Mermaid diagram read from an external file."
|
|
919
|
-
|
|
920
|
-
if not absolute_path.name.endswith(".mmd") and not absolute_path.name.endswith(".mermaid"):
|
|
921
|
-
raise DocumentError("invalid image format; expected: `*.mmd` or `*.mermaid`")
|
|
922
|
-
|
|
923
|
-
relative_path = path_relative_to(absolute_path, self.base_dir)
|
|
924
|
-
if self.options.render_mermaid:
|
|
925
|
-
with open(absolute_path, "r", encoding="utf-8") as f:
|
|
926
|
-
content = f.read()
|
|
927
|
-
config = self._extract_mermaid_config(content)
|
|
928
|
-
image_data = mermaid.render_diagram(content, self.options.diagram_output_format, config=config)
|
|
929
|
-
image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
|
|
930
|
-
self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
|
|
931
|
-
return self._create_attached_image(image_filename, attrs)
|
|
932
|
-
else:
|
|
933
|
-
self.images.append(ImageData(absolute_path, attrs.alt))
|
|
934
|
-
mermaid_filename = attachment_name(relative_path)
|
|
935
|
-
return self._create_mermaid_embed(mermaid_filename)
|
|
936
|
-
|
|
937
|
-
def _transform_fenced_mermaid(self, content: str) -> ElementType:
|
|
938
|
-
"Emits Confluence Storage Format XHTML for a Mermaid diagram defined in a fenced code block."
|
|
939
|
-
|
|
940
|
-
if self.options.render_mermaid:
|
|
941
|
-
config = self._extract_mermaid_config(content)
|
|
942
|
-
image_data = mermaid.render_diagram(content, self.options.diagram_output_format, config=config)
|
|
943
|
-
image_hash = hashlib.md5(image_data).hexdigest()
|
|
944
|
-
image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
|
|
945
|
-
self.embedded_files[image_filename] = EmbeddedFileData(image_data)
|
|
946
|
-
return self._create_attached_image(image_filename, ImageAttributes.EMPTY_BLOCK)
|
|
947
|
-
else:
|
|
948
|
-
mermaid_data = content.encode("utf-8")
|
|
949
|
-
mermaid_hash = hashlib.md5(mermaid_data).hexdigest()
|
|
950
|
-
mermaid_filename = attachment_name(f"embedded_{mermaid_hash}.mmd")
|
|
951
|
-
self.embedded_files[mermaid_filename] = EmbeddedFileData(mermaid_data)
|
|
952
|
-
return self._create_mermaid_embed(mermaid_filename)
|
|
953
|
-
|
|
954
|
-
def _create_mermaid_embed(self, filename: str) -> ElementType:
|
|
955
|
-
"A Mermaid diagram, linking to an attachment that captures the Mermaid source."
|
|
956
|
-
|
|
957
|
-
local_id = str(uuid.uuid4())
|
|
958
|
-
macro_id = str(uuid.uuid4())
|
|
959
|
-
return AC_ELEM(
|
|
960
|
-
"structured-macro",
|
|
961
|
-
{
|
|
962
|
-
AC_ATTR("name"): "mermaid-cloud",
|
|
963
|
-
AC_ATTR("schema-version"): "1",
|
|
964
|
-
"data-layout": "default",
|
|
965
|
-
AC_ATTR("local-id"): local_id,
|
|
966
|
-
AC_ATTR("macro-id"): macro_id,
|
|
967
|
-
},
|
|
968
|
-
AC_ELEM(
|
|
969
|
-
"parameter",
|
|
970
|
-
{AC_ATTR("name"): "filename"},
|
|
971
|
-
filename,
|
|
972
|
-
),
|
|
973
|
-
AC_ELEM(
|
|
974
|
-
"parameter",
|
|
975
|
-
{AC_ATTR("name"): "toolbar"},
|
|
976
|
-
"bottom",
|
|
977
|
-
),
|
|
978
|
-
AC_ELEM(
|
|
979
|
-
"parameter",
|
|
980
|
-
{AC_ATTR("name"): "zoom"},
|
|
981
|
-
"fit",
|
|
982
|
-
),
|
|
983
|
-
AC_ELEM("parameter", {AC_ATTR("name"): "revision"}, "1"),
|
|
984
|
-
)
|
|
985
|
-
|
|
986
711
|
def _transform_toc(self, code: ElementType) -> ElementType:
|
|
987
712
|
"Creates a table of contents, constructed from headings in the document."
|
|
988
713
|
|
|
@@ -1299,16 +1024,24 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1299
1024
|
|
|
1300
1025
|
image_data = render_latex(content, format=self.options.diagram_output_format)
|
|
1301
1026
|
if self.options.diagram_output_format == "png":
|
|
1302
|
-
width, height =
|
|
1027
|
+
width, height = extract_png_dimensions(data=image_data)
|
|
1303
1028
|
image_data = remove_png_chunks(["pHYs"], source_data=image_data)
|
|
1304
|
-
attrs = ImageAttributes(
|
|
1029
|
+
attrs = ImageAttributes(
|
|
1030
|
+
context,
|
|
1031
|
+
width=width,
|
|
1032
|
+
height=height,
|
|
1033
|
+
alt=content,
|
|
1034
|
+
title=None,
|
|
1035
|
+
caption="",
|
|
1036
|
+
alignment=ImageAlignment(self.options.layout.get_image_alignment()),
|
|
1037
|
+
)
|
|
1305
1038
|
else:
|
|
1306
1039
|
attrs = ImageAttributes.empty(context)
|
|
1307
1040
|
|
|
1308
1041
|
image_hash = hashlib.md5(image_data).hexdigest()
|
|
1309
1042
|
image_filename = attachment_name(f"formula_{image_hash}.{self.options.diagram_output_format}")
|
|
1310
|
-
self.
|
|
1311
|
-
image = self.
|
|
1043
|
+
self.attachments.add_embed(image_filename, EmbeddedFileData(image_data, content))
|
|
1044
|
+
image = self.image_generator.create_attached_image(image_filename, attrs)
|
|
1312
1045
|
return image
|
|
1313
1046
|
|
|
1314
1047
|
def _transform_inline_math(self, elem: ElementType) -> ElementType:
|
|
@@ -1342,7 +1075,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1342
1075
|
{AC_ATTR("name"): "body"},
|
|
1343
1076
|
content,
|
|
1344
1077
|
),
|
|
1345
|
-
AC_ELEM("parameter", {AC_ATTR("name"): "align"}, self.options.
|
|
1078
|
+
AC_ELEM("parameter", {AC_ATTR("name"): "align"}, self.options.layout.get_image_alignment()),
|
|
1346
1079
|
)
|
|
1347
1080
|
return macro
|
|
1348
1081
|
|
|
@@ -1379,15 +1112,23 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1379
1112
|
{AC_ATTR("name"): "body"},
|
|
1380
1113
|
content,
|
|
1381
1114
|
),
|
|
1382
|
-
AC_ELEM("parameter", {AC_ATTR("name"): "align"}, self.options.
|
|
1115
|
+
AC_ELEM("parameter", {AC_ATTR("name"): "align"}, self.options.layout.get_image_alignment()),
|
|
1383
1116
|
)
|
|
1384
1117
|
|
|
1385
1118
|
def _transform_footnote_ref(self, elem: ElementType) -> None:
|
|
1386
1119
|
"""
|
|
1387
1120
|
Transforms a footnote reference.
|
|
1388
1121
|
|
|
1122
|
+
When a footnote is referenced multiple times, Python-Markdown generates
|
|
1123
|
+
different `id` attributes for each reference:
|
|
1124
|
+
- First reference: `fnref:NAME`
|
|
1125
|
+
- Second reference: `fnref2:NAME`
|
|
1126
|
+
- Third reference: `fnref3:NAME`
|
|
1127
|
+
- etc.
|
|
1128
|
+
|
|
1389
1129
|
```
|
|
1390
1130
|
<sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">REF</a></sup>
|
|
1131
|
+
<sup id="fnref2:NAME"><a class="footnote-ref" href="#fn:NAME">REF</a></sup>
|
|
1391
1132
|
```
|
|
1392
1133
|
"""
|
|
1393
1134
|
|
|
@@ -1395,9 +1136,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1395
1136
|
raise DocumentError("expected: `<sup>` as the HTML element for a footnote reference")
|
|
1396
1137
|
|
|
1397
1138
|
ref_id = elem.attrib.pop("id", "")
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1139
|
+
# Match fnref:NAME, fnref2:NAME, fnref3:NAME, etc.
|
|
1140
|
+
match = re.match(r"^fnref(\d*):(.+)$", ref_id)
|
|
1141
|
+
if match is None:
|
|
1142
|
+
raise DocumentError("expected: attribute `id` of format `fnref:NAME` or `fnrefN:NAME` applied on `<sup>` for a footnote reference")
|
|
1143
|
+
numeric_suffix = match.group(1)
|
|
1144
|
+
footnote_name = match.group(2)
|
|
1145
|
+
# Build anchor name: first reference uses NAME, subsequent references use NAME-N
|
|
1146
|
+
footnote_ref = f"{footnote_name}-{numeric_suffix}" if numeric_suffix else footnote_name
|
|
1401
1147
|
|
|
1402
1148
|
link = next((elem.iterchildren(tag="a")), None)
|
|
1403
1149
|
if link is None:
|
|
@@ -1443,6 +1189,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1443
1189
|
"""
|
|
1444
1190
|
Transforms the footnote definition block.
|
|
1445
1191
|
|
|
1192
|
+
When a footnote is referenced multiple times, Python-Markdown generates
|
|
1193
|
+
multiple back-reference links in the footnote definition:
|
|
1194
|
+
- First reference: `#fnref:NAME`
|
|
1195
|
+
- Second reference: `#fnref2:NAME`
|
|
1196
|
+
- Third reference: `#fnref3:NAME`
|
|
1197
|
+
- etc.
|
|
1198
|
+
|
|
1446
1199
|
```
|
|
1447
1200
|
<div class="footnote">
|
|
1448
1201
|
<hr/>
|
|
@@ -1453,6 +1206,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1453
1206
|
</ol>
|
|
1454
1207
|
</div>
|
|
1455
1208
|
```
|
|
1209
|
+
|
|
1210
|
+
With multiple references to the same footnote:
|
|
1211
|
+
```
|
|
1212
|
+
<li id="fn:NAME">
|
|
1213
|
+
<p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a><a class="footnote-backref" href="#fnref2:NAME">↩</a></p>
|
|
1214
|
+
</li>
|
|
1215
|
+
```
|
|
1456
1216
|
"""
|
|
1457
1217
|
|
|
1458
1218
|
ordered_list = next((elem.iterchildren(tag="ol")), None)
|
|
@@ -1468,21 +1228,33 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1468
1228
|
raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
|
|
1469
1229
|
footnote_def = def_id.removeprefix("fn:")
|
|
1470
1230
|
|
|
1471
|
-
|
|
1472
|
-
|
|
1231
|
+
# find the last paragraph, which is where the backref links are placed
|
|
1232
|
+
paragraphs = list(list_item.iterchildren(tag="p"))
|
|
1233
|
+
if not paragraphs:
|
|
1473
1234
|
raise DocumentError("expected: `<p>` as a child of `<li>` in a footnote definition")
|
|
1235
|
+
last_paragraph = paragraphs[-1]
|
|
1236
|
+
|
|
1237
|
+
# collect all backref anchors (there may be multiple when a footnote is referenced multiple times)
|
|
1238
|
+
# pattern matches #fnref:NAME, #fnref2:NAME, #fnref3:NAME, etc.
|
|
1239
|
+
# store tuples of (anchor_element, number, footnote_name)
|
|
1240
|
+
backref_info: list[tuple[ElementType, int | None, str]] = []
|
|
1241
|
+
for anchor in list(last_paragraph.iterchildren(tag="a")):
|
|
1242
|
+
href = anchor.get("href", "")
|
|
1243
|
+
match = re.match(r"^#fnref(\d*):(.+)$", href)
|
|
1244
|
+
if match is not None:
|
|
1245
|
+
backref_info.append((anchor, int(match.group(1), base=10) if match.group(1) else None, match.group(2)))
|
|
1246
|
+
|
|
1247
|
+
if not backref_info:
|
|
1248
|
+
raise DocumentError(
|
|
1249
|
+
"expected: at least one `<a>` element with `href` attribute of format `#fnref:NAME` or `#fnrefN:NAME` in a footnote definition"
|
|
1250
|
+
)
|
|
1474
1251
|
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
ref_href = ref_anchor.get("href", "")
|
|
1480
|
-
if not ref_href.startswith("#fnref:"):
|
|
1481
|
-
raise DocumentError("expected: attribute `href` of format `#fnref:NAME` applied on last element `<a>` for a footnote definition")
|
|
1482
|
-
footnote_ref = ref_href.removeprefix("#fnref:")
|
|
1252
|
+
# remove all back-links generated by Python-Markdown
|
|
1253
|
+
for anchor, _, _ in backref_info:
|
|
1254
|
+
last_paragraph.remove(anchor)
|
|
1483
1255
|
|
|
1484
|
-
#
|
|
1485
|
-
|
|
1256
|
+
# use the first paragraph for the anchor placement
|
|
1257
|
+
first_paragraph = paragraphs[0]
|
|
1486
1258
|
|
|
1487
1259
|
# build new anchor for footnote definition
|
|
1488
1260
|
def_anchor = AC_ELEM(
|
|
@@ -1498,20 +1270,40 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1498
1270
|
),
|
|
1499
1271
|
)
|
|
1500
1272
|
|
|
1501
|
-
# build
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1273
|
+
# build back-links to each footnote reference in page body:
|
|
1274
|
+
# * for single reference: ↩
|
|
1275
|
+
# * for multiple references: ↩¹ ↩² ↩³ ...
|
|
1276
|
+
for _, number, footnote_name in backref_info:
|
|
1277
|
+
# build anchor name matching the reference anchor:
|
|
1278
|
+
# * first reference: footnote-ref-NAME
|
|
1279
|
+
# * subsequent references: footnote-ref-NAME-N
|
|
1280
|
+
if number is None:
|
|
1281
|
+
anchor_name = f"footnote-ref-{footnote_name}"
|
|
1282
|
+
if len(backref_info) > 1:
|
|
1283
|
+
link_text = "↩¹"
|
|
1284
|
+
else:
|
|
1285
|
+
link_text = "↩"
|
|
1286
|
+
else:
|
|
1287
|
+
anchor_name = f"footnote-ref-{footnote_name}-{number}"
|
|
1288
|
+
|
|
1289
|
+
# use superscript numbers for references
|
|
1290
|
+
superscript_digits = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
|
|
1291
|
+
link_text = f"↩{str(number).translate(superscript_digits)}"
|
|
1292
|
+
|
|
1293
|
+
ref_link = AC_ELEM(
|
|
1294
|
+
"link",
|
|
1295
|
+
{
|
|
1296
|
+
AC_ATTR("anchor"): anchor_name,
|
|
1297
|
+
},
|
|
1298
|
+
AC_ELEM("link-body", ET.CDATA(link_text)),
|
|
1299
|
+
)
|
|
1300
|
+
|
|
1301
|
+
last_paragraph.append(ref_link)
|
|
1509
1302
|
|
|
1510
|
-
# append
|
|
1511
|
-
|
|
1512
|
-
def_anchor.tail =
|
|
1513
|
-
|
|
1514
|
-
paragraph.append(ref_link)
|
|
1303
|
+
# append anchor to first paragraph
|
|
1304
|
+
first_paragraph.insert(0, def_anchor)
|
|
1305
|
+
def_anchor.tail = first_paragraph.text
|
|
1306
|
+
first_paragraph.text = None
|
|
1515
1307
|
|
|
1516
1308
|
def _transform_tasklist(self, elem: ElementType) -> ElementType:
|
|
1517
1309
|
"""
|
|
@@ -1572,161 +1364,174 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
1572
1364
|
if not isinstance(child.tag, str):
|
|
1573
1365
|
return None
|
|
1574
1366
|
|
|
1575
|
-
|
|
1576
|
-
if child.tag == "p":
|
|
1577
|
-
# <p><img src="..." /></p>
|
|
1578
|
-
if len(child) == 1 and not child.text and child[0].tag == "img" and not child[0].tail:
|
|
1579
|
-
return self._transform_image(FormattingContext.BLOCK, child[0])
|
|
1580
|
-
|
|
1581
|
-
# <p>[[<em>TOC</em>]]</p> (represented in Markdown as `[[_TOC_]]`)
|
|
1582
|
-
elif is_placeholder_for(child, "TOC"):
|
|
1583
|
-
return self._transform_toc(child)
|
|
1584
|
-
|
|
1585
|
-
# <p>[[<em>LISTING</em>]]</p> (represented in Markdown as `[[_LISTING_]]`)
|
|
1586
|
-
elif is_placeholder_for(child, "LISTING"):
|
|
1587
|
-
return self._transform_listing(child)
|
|
1588
|
-
|
|
1589
|
-
# <div>...</div>
|
|
1590
|
-
elif child.tag == "div":
|
|
1591
|
-
classes = child.get("class", "").split(" ")
|
|
1592
|
-
|
|
1593
|
-
# <div class="arithmatex">...</div>
|
|
1594
|
-
if "arithmatex" in classes:
|
|
1595
|
-
return self._transform_block_math(child)
|
|
1596
|
-
|
|
1597
|
-
# <div><ac:structured-macro ...>...</ac:structured-macro></div>
|
|
1598
|
-
elif "csf" in classes:
|
|
1599
|
-
if len(child) != 1:
|
|
1600
|
-
raise DocumentError("expected: single child in Confluence Storage Format block")
|
|
1601
|
-
|
|
1602
|
-
return child[0]
|
|
1603
|
-
|
|
1604
|
-
# <div class="footnote">
|
|
1605
|
-
# <hr/>
|
|
1606
|
-
# <ol>
|
|
1607
|
-
# <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
|
|
1608
|
-
# </ol>
|
|
1609
|
-
# </div>
|
|
1610
|
-
elif "footnote" in classes:
|
|
1611
|
-
self._transform_footnote_def(child)
|
|
1612
|
-
return None
|
|
1613
|
-
|
|
1614
|
-
# <div class="admonition note">
|
|
1615
|
-
# <p class="admonition-title">Note</p>
|
|
1616
|
-
# <p>...</p>
|
|
1617
|
-
# </div>
|
|
1618
|
-
#
|
|
1619
|
-
# --- OR ---
|
|
1620
|
-
#
|
|
1621
|
-
# <div class="admonition note">
|
|
1367
|
+
match child.tag:
|
|
1622
1368
|
# <p>...</p>
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
#
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1369
|
+
case "p":
|
|
1370
|
+
# <p><img src="..." /></p>
|
|
1371
|
+
if len(child) == 1 and not child.text and child[0].tag == "img" and not child[0].tail:
|
|
1372
|
+
return self._transform_image(FormattingContext.BLOCK, child[0])
|
|
1373
|
+
|
|
1374
|
+
# <p>[[<em>TOC</em>]]</p> (represented in Markdown as `[[_TOC_]]`)
|
|
1375
|
+
elif is_placeholder_for(child, "TOC"):
|
|
1376
|
+
return self._transform_toc(child)
|
|
1377
|
+
|
|
1378
|
+
# <p>[[<em>LISTING</em>]]</p> (represented in Markdown as `[[_LISTING_]]`)
|
|
1379
|
+
elif is_placeholder_for(child, "LISTING"):
|
|
1380
|
+
return self._transform_listing(child)
|
|
1381
|
+
|
|
1382
|
+
# <div>...</div>
|
|
1383
|
+
case "div":
|
|
1384
|
+
classes = child.get("class", "").split(" ")
|
|
1385
|
+
|
|
1386
|
+
# <div class="arithmatex">...</div>
|
|
1387
|
+
if "arithmatex" in classes:
|
|
1388
|
+
return self._transform_block_math(child)
|
|
1389
|
+
|
|
1390
|
+
# <div><ac:structured-macro ...>...</ac:structured-macro></div>
|
|
1391
|
+
elif "csf" in classes:
|
|
1392
|
+
if len(child) != 1:
|
|
1393
|
+
raise DocumentError("expected: single child in Confluence Storage Format block")
|
|
1394
|
+
|
|
1395
|
+
return child[0]
|
|
1396
|
+
|
|
1397
|
+
# <div class="footnote">
|
|
1398
|
+
# <hr/>
|
|
1399
|
+
# <ol>
|
|
1400
|
+
# <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
|
|
1401
|
+
# </ol>
|
|
1402
|
+
# </div>
|
|
1403
|
+
elif "footnote" in classes:
|
|
1404
|
+
self._transform_footnote_def(child)
|
|
1405
|
+
return None
|
|
1406
|
+
|
|
1407
|
+
# <div class="admonition note">
|
|
1408
|
+
# <p class="admonition-title">Note</p>
|
|
1409
|
+
# <p>...</p>
|
|
1410
|
+
# </div>
|
|
1411
|
+
#
|
|
1412
|
+
# --- OR ---
|
|
1413
|
+
#
|
|
1414
|
+
# <div class="admonition note">
|
|
1415
|
+
# <p>...</p>
|
|
1416
|
+
# </div>
|
|
1417
|
+
elif "admonition" in classes:
|
|
1418
|
+
return self._transform_admonition(child)
|
|
1419
|
+
|
|
1420
|
+
# <blockquote>...</blockquote>
|
|
1421
|
+
case "blockquote":
|
|
1422
|
+
# Alerts in GitHub
|
|
1423
|
+
# <blockquote>
|
|
1424
|
+
# <p>[!TIP] ...</p>
|
|
1425
|
+
# </blockquote>
|
|
1426
|
+
if len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
|
|
1427
|
+
return self._transform_github_alert(child)
|
|
1428
|
+
|
|
1429
|
+
# Alerts in GitLab
|
|
1430
|
+
# <blockquote>
|
|
1431
|
+
# <p>DISCLAIMER: ...</p>
|
|
1432
|
+
# </blockquote>
|
|
1433
|
+
elif len(child) > 0 and child[0].tag == "p" and element_text_starts_with_any(child[0], ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"]):
|
|
1434
|
+
return self._transform_gitlab_alert(child)
|
|
1435
|
+
|
|
1436
|
+
# <details markdown="1">
|
|
1437
|
+
# <summary>...</summary>
|
|
1438
|
+
# ...
|
|
1439
|
+
# </details>
|
|
1440
|
+
case "details" if len(child) > 1 and child[0].tag == "summary":
|
|
1441
|
+
return self._transform_collapsed(child)
|
|
1655
1442
|
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
if len(child) > 0 and all(element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]) for item in child):
|
|
1662
|
-
return self._transform_tasklist(child)
|
|
1443
|
+
# <ol>...</ol>
|
|
1444
|
+
case "ol":
|
|
1445
|
+
# Confluence adds the attribute `start` for every ordered list
|
|
1446
|
+
child.set("start", "1")
|
|
1447
|
+
return None
|
|
1663
1448
|
|
|
1664
|
-
|
|
1449
|
+
# <ul>
|
|
1450
|
+
# <li>[ ] ...</li>
|
|
1451
|
+
# <li>[x] ...</li>
|
|
1452
|
+
# </ul>
|
|
1453
|
+
case "ul":
|
|
1454
|
+
if len(child) > 0 and all(element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]) for item in child):
|
|
1455
|
+
return self._transform_tasklist(child)
|
|
1665
1456
|
|
|
1666
|
-
|
|
1667
|
-
normalize_inline(child)
|
|
1668
|
-
return None
|
|
1457
|
+
return None
|
|
1669
1458
|
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1459
|
+
case "li":
|
|
1460
|
+
normalize_inline(child)
|
|
1461
|
+
return None
|
|
1673
1462
|
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1463
|
+
# <pre><code class="language-java"> ... </code></pre>
|
|
1464
|
+
case "pre" if len(child) == 1 and child[0].tag == "code":
|
|
1465
|
+
return self._transform_code_block(child[0])
|
|
1466
|
+
|
|
1467
|
+
# <table>...</table>
|
|
1468
|
+
case "table":
|
|
1469
|
+
for td in child.iterdescendants("td", "th"):
|
|
1470
|
+
normalize_inline(td)
|
|
1471
|
+
match self.options.layout.alignment:
|
|
1472
|
+
case "left":
|
|
1473
|
+
layout = "align-start"
|
|
1474
|
+
case _:
|
|
1475
|
+
layout = "default"
|
|
1476
|
+
child.set("data-layout", layout)
|
|
1477
|
+
if self.options.layout.table.display_mode == "fixed":
|
|
1478
|
+
child.set("data-table-display-mode", "fixed")
|
|
1479
|
+
if self.options.layout.table.width:
|
|
1480
|
+
child.set("data-table-width", str(self.options.layout.table.width))
|
|
1680
1481
|
|
|
1681
|
-
|
|
1682
|
-
elif child.tag == "img":
|
|
1683
|
-
return self._transform_image(FormattingContext.INLINE, child)
|
|
1482
|
+
return None
|
|
1684
1483
|
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1484
|
+
# <img src="..." alt="..." />
|
|
1485
|
+
case "img":
|
|
1486
|
+
return self._transform_image(FormattingContext.INLINE, child)
|
|
1688
1487
|
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1488
|
+
# <a href="..."> ... </a>
|
|
1489
|
+
case "a":
|
|
1490
|
+
return self._transform_link(child)
|
|
1692
1491
|
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1492
|
+
# <mark>...</mark>
|
|
1493
|
+
case "mark":
|
|
1494
|
+
return self._transform_mark(child)
|
|
1696
1495
|
|
|
1697
|
-
# <span
|
|
1698
|
-
|
|
1699
|
-
|
|
1496
|
+
# <span>...</span>
|
|
1497
|
+
case "span":
|
|
1498
|
+
classes = child.get("class", "").split(" ")
|
|
1700
1499
|
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
return None
|
|
1500
|
+
# <span class="arithmatex">...</span>
|
|
1501
|
+
if "arithmatex" in classes:
|
|
1502
|
+
return self._transform_inline_math(child)
|
|
1705
1503
|
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1504
|
+
# <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
|
|
1505
|
+
# Multiple references: <sup id="fnref2:NAME">...</sup>, <sup id="fnref3:NAME">...</sup>
|
|
1506
|
+
case "sup" if re.match(r"^fnref\d*:", child.get("id", "")):
|
|
1507
|
+
self._transform_footnote_ref(child)
|
|
1508
|
+
return None
|
|
1709
1509
|
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
child.tag = "u"
|
|
1510
|
+
# <input type="date" value="1984-01-01" />
|
|
1511
|
+
case "input" if child.get("type", "") == "date":
|
|
1512
|
+
return HTML("time", {"datetime": child.get("value", "")})
|
|
1714
1513
|
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1514
|
+
# <ins>...</ins>
|
|
1515
|
+
case "ins":
|
|
1516
|
+
# Confluence prefers <u> over <ins> for underline, and replaces <ins> with <u>
|
|
1517
|
+
child.tag = "u"
|
|
1718
1518
|
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
if m is not None:
|
|
1723
|
-
level = int(m.group(1))
|
|
1724
|
-
title = element_to_text(child)
|
|
1725
|
-
self.toc.add(level, title)
|
|
1519
|
+
# <x-emoji data-shortname="wink" data-unicode="1f609">😉</x-emoji>
|
|
1520
|
+
case "x-emoji":
|
|
1521
|
+
return self._transform_emoji(child)
|
|
1726
1522
|
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1523
|
+
# <h1>...</h1>
|
|
1524
|
+
# <h2>...</h2> ...
|
|
1525
|
+
case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":
|
|
1526
|
+
level = int(child.tag[1:])
|
|
1527
|
+
title = element_to_text(child)
|
|
1528
|
+
self.toc.add(level, title)
|
|
1529
|
+
|
|
1530
|
+
if self.options.heading_anchors:
|
|
1531
|
+
self._transform_heading(child)
|
|
1532
|
+
return None
|
|
1533
|
+
case _:
|
|
1534
|
+
pass
|
|
1730
1535
|
|
|
1731
1536
|
return None
|
|
1732
1537
|
|
|
@@ -1750,14 +1555,14 @@ class ConfluenceDocument:
|
|
|
1750
1555
|
images: list[ImageData]
|
|
1751
1556
|
embedded_files: dict[str, EmbeddedFileData]
|
|
1752
1557
|
|
|
1753
|
-
options:
|
|
1558
|
+
options: DocumentOptions
|
|
1754
1559
|
root: ElementType
|
|
1755
1560
|
|
|
1756
1561
|
@classmethod
|
|
1757
1562
|
def create(
|
|
1758
1563
|
cls,
|
|
1759
1564
|
path: Path,
|
|
1760
|
-
options:
|
|
1565
|
+
options: DocumentOptions,
|
|
1761
1566
|
root_dir: Path,
|
|
1762
1567
|
site_metadata: ConfluenceSiteMetadata,
|
|
1763
1568
|
page_metadata: ConfluencePageCollection,
|
|
@@ -1765,9 +1570,10 @@ class ConfluenceDocument:
|
|
|
1765
1570
|
path = path.resolve(True)
|
|
1766
1571
|
|
|
1767
1572
|
document = Scanner().read(path)
|
|
1573
|
+
props = document.properties
|
|
1768
1574
|
|
|
1769
|
-
if
|
|
1770
|
-
page_id = ConfluencePageID(
|
|
1575
|
+
if props.page_id is not None:
|
|
1576
|
+
page_id = ConfluencePageID(props.page_id)
|
|
1771
1577
|
else:
|
|
1772
1578
|
# look up Confluence page ID in metadata
|
|
1773
1579
|
metadata = page_metadata.get(path)
|
|
@@ -1782,13 +1588,14 @@ class ConfluenceDocument:
|
|
|
1782
1588
|
self,
|
|
1783
1589
|
path: Path,
|
|
1784
1590
|
document: ScannedDocument,
|
|
1785
|
-
options:
|
|
1591
|
+
options: DocumentOptions,
|
|
1786
1592
|
root_dir: Path,
|
|
1787
1593
|
site_metadata: ConfluenceSiteMetadata,
|
|
1788
1594
|
page_metadata: ConfluencePageCollection,
|
|
1789
1595
|
) -> None:
|
|
1790
1596
|
"Converts a single Markdown document to Confluence Storage Format."
|
|
1791
1597
|
|
|
1598
|
+
props = document.properties
|
|
1792
1599
|
self.options = options
|
|
1793
1600
|
|
|
1794
1601
|
# register auxiliary URL substitutions
|
|
@@ -1802,11 +1609,12 @@ class ConfluenceDocument:
|
|
|
1802
1609
|
|
|
1803
1610
|
# modify HTML as necessary
|
|
1804
1611
|
if self.options.generated_by is not None:
|
|
1805
|
-
generated_by =
|
|
1612
|
+
generated_by = props.generated_by or self.options.generated_by
|
|
1806
1613
|
else:
|
|
1807
1614
|
generated_by = None
|
|
1808
1615
|
|
|
1809
1616
|
if generated_by is not None:
|
|
1617
|
+
generated_by = apply_generated_by_template(generated_by, path.relative_to(root_dir))
|
|
1810
1618
|
generated_by_html = markdown_to_html(generated_by)
|
|
1811
1619
|
|
|
1812
1620
|
content = [
|
|
@@ -1825,11 +1633,9 @@ class ConfluenceDocument:
|
|
|
1825
1633
|
raise ConversionError(path) from ex
|
|
1826
1634
|
|
|
1827
1635
|
# configure HTML-to-Confluence converter
|
|
1828
|
-
converter_options =
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
if document.alignment is not None:
|
|
1832
|
-
converter_options.alignment = document.alignment
|
|
1636
|
+
converter_options = copy.deepcopy(self.options.converter)
|
|
1637
|
+
if props.layout is not None:
|
|
1638
|
+
converter_options.layout = coalesce(props.layout, converter_options.layout)
|
|
1833
1639
|
converter = ConfluenceStorageFormatConverter(converter_options, path, root_dir, site_metadata, page_metadata)
|
|
1834
1640
|
|
|
1835
1641
|
# execute HTML-to-Confluence converter
|
|
@@ -1840,45 +1646,66 @@ class ConfluenceDocument:
|
|
|
1840
1646
|
|
|
1841
1647
|
# extract information discovered by converter
|
|
1842
1648
|
self.links = converter.links
|
|
1843
|
-
self.images = converter.images
|
|
1844
|
-
self.embedded_files = converter.embedded_files
|
|
1649
|
+
self.images = converter.attachments.images
|
|
1650
|
+
self.embedded_files = converter.attachments.embedded_files
|
|
1845
1651
|
|
|
1846
1652
|
# assign global properties for document
|
|
1847
|
-
self.title =
|
|
1848
|
-
self.labels =
|
|
1849
|
-
self.properties =
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
Allowed characters:
|
|
1862
|
-
|
|
1863
|
-
* Alphanumeric characters: 0-9, a-z, A-Z
|
|
1864
|
-
* Special characters: hyphen (-), underscore (_), period (.)
|
|
1865
|
-
"""
|
|
1866
|
-
|
|
1867
|
-
if isinstance(ref, Path):
|
|
1868
|
-
path = ref
|
|
1869
|
-
else:
|
|
1870
|
-
path = Path(ref)
|
|
1653
|
+
self.title = props.title or converter.toc.get_title()
|
|
1654
|
+
self.labels = props.tags
|
|
1655
|
+
self.properties = props.properties
|
|
1656
|
+
|
|
1657
|
+
# Remove the first heading if:
|
|
1658
|
+
# 1. The option is enabled
|
|
1659
|
+
# 2. Title was NOT from front-matter (document.title is None)
|
|
1660
|
+
# 3. A title was successfully extracted from heading (self.title is not None)
|
|
1661
|
+
if converter_options.skip_title_heading and props.title is None and self.title is not None:
|
|
1662
|
+
self._remove_first_heading()
|
|
1663
|
+
|
|
1664
|
+
def _remove_first_heading(self) -> None:
|
|
1665
|
+
"""
|
|
1666
|
+
Removes the first heading element from the document root.
|
|
1871
1667
|
|
|
1872
|
-
|
|
1873
|
-
|
|
1668
|
+
This is used when the title was extracted from the first unique top-level heading
|
|
1669
|
+
and the user has requested to skip it from the body to avoid duplication.
|
|
1874
1670
|
|
|
1875
|
-
|
|
1671
|
+
Handles the case where a generated-by info panel may be present as the first child.
|
|
1672
|
+
"""
|
|
1876
1673
|
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1674
|
+
# Find the first heading element (h1-h6) in the root
|
|
1675
|
+
heading_pattern = re.compile(r"^h[1-6]$", re.IGNORECASE)
|
|
1676
|
+
|
|
1677
|
+
for idx, child in enumerate(self.root):
|
|
1678
|
+
if not isinstance(child.tag, str):
|
|
1679
|
+
continue
|
|
1680
|
+
|
|
1681
|
+
if heading_pattern.match(child.tag) is None:
|
|
1682
|
+
continue
|
|
1683
|
+
|
|
1684
|
+
# Preserve any text that comes after the heading (tail text)
|
|
1685
|
+
tail = child.tail
|
|
1686
|
+
|
|
1687
|
+
# Remove the heading
|
|
1688
|
+
self.root.remove(child)
|
|
1689
|
+
|
|
1690
|
+
# If there was tail text, attach it to the previous sibling's tail
|
|
1691
|
+
# or to the parent's text if this was the first child
|
|
1692
|
+
if tail:
|
|
1693
|
+
if idx > 0:
|
|
1694
|
+
# Append to previous sibling's tail
|
|
1695
|
+
prev_sibling = self.root[idx - 1]
|
|
1696
|
+
if prev_sibling.tail:
|
|
1697
|
+
prev_sibling.tail += tail
|
|
1698
|
+
else:
|
|
1699
|
+
prev_sibling.tail = tail
|
|
1700
|
+
else:
|
|
1701
|
+
# No previous sibling, append to parent's text
|
|
1702
|
+
if self.root.text:
|
|
1703
|
+
self.root.text += tail
|
|
1704
|
+
else:
|
|
1705
|
+
self.root.text = tail
|
|
1706
|
+
|
|
1707
|
+
# Only remove the FIRST heading, then stop
|
|
1708
|
+
break
|
|
1882
1709
|
|
|
1883
|
-
|
|
1884
|
-
|
|
1710
|
+
def xhtml(self) -> str:
|
|
1711
|
+
return elements_to_string(self.root)
|