markdown-to-confluence 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/csf.py CHANGED
@@ -18,16 +18,16 @@ from lxml.builder import ElementMaker
18
18
  ElementType = ET._Element # pyright: ignore [reportPrivateUsage]
19
19
 
20
20
  # XML namespaces typically associated with Confluence Storage Format documents
21
- _namespaces = {
21
+ _NAMESPACES = {
22
22
  "ac": "http://atlassian.com/content",
23
23
  "ri": "http://atlassian.com/resource/identifier",
24
24
  }
25
- for key, value in _namespaces.items():
25
+ for key, value in _NAMESPACES.items():
26
26
  ET.register_namespace(key, value)
27
27
 
28
28
  HTML = ElementMaker()
29
- AC_ELEM = ElementMaker(namespace=_namespaces["ac"])
30
- RI_ELEM = ElementMaker(namespace=_namespaces["ri"])
29
+ AC_ELEM = ElementMaker(namespace=_NAMESPACES["ac"])
30
+ RI_ELEM = ElementMaker(namespace=_NAMESPACES["ri"])
31
31
 
32
32
 
33
33
  class ParseError(RuntimeError):
@@ -39,11 +39,11 @@ def _qname(namespace_uri: str, name: str) -> str:
39
39
 
40
40
 
41
41
  def AC_ATTR(name: str) -> str:
42
- return _qname(_namespaces["ac"], name)
42
+ return _qname(_NAMESPACES["ac"], name)
43
43
 
44
44
 
45
45
  def RI_ATTR(name: str) -> str:
46
- return _qname(_namespaces["ri"], name)
46
+ return _qname(_NAMESPACES["ri"], name)
47
47
 
48
48
 
49
49
  @contextmanager
@@ -77,7 +77,7 @@ def _elements_from_strings(dtd_path: Path, items: list[str]) -> ElementType:
77
77
  load_dtd=True,
78
78
  )
79
79
 
80
- ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in _namespaces.items())
80
+ ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in _NAMESPACES.items())
81
81
 
82
82
  data = [
83
83
  '<?xml version="1.0"?>',
@@ -139,6 +139,9 @@ def content_to_string(content: str) -> str:
139
139
  return _content_to_string(dtd_path, content)
140
140
 
141
141
 
142
+ _ROOT_REGEXP = re.compile(r"^<root\s+[^>]*>(.*)</root>\s*$", re.DOTALL)
143
+
144
+
142
145
  def elements_to_string(root: ElementType) -> str:
143
146
  """
144
147
  Converts a Confluence Storage Format element tree into an XML string to push to Confluence REST API.
@@ -148,8 +151,7 @@ def elements_to_string(root: ElementType) -> str:
148
151
  """
149
152
 
150
153
  xml = ET.tostring(root, encoding="utf8", method="xml").decode("utf8")
151
- m = re.match(r"^<root\s+[^>]*>(.*)</root>\s*$", xml, re.DOTALL)
152
- if m:
154
+ if m := _ROOT_REGEXP.match(xml):
153
155
  return m.group(1)
154
156
  else:
155
157
  raise ValueError("expected: Confluence content")
@@ -219,3 +221,81 @@ def normalize_inline(elem: ElementType) -> None:
219
221
  # ignore empty elements
220
222
  if item.tag != "p" or len(item) > 0 or item.text:
221
223
  elem.append(item)
224
+
225
+
226
+ # elements in which whitespace is normalized
227
+ _NORMALIZED_ELEMENTS = [
228
+ "a",
229
+ "b",
230
+ "blockquote",
231
+ "code",
232
+ "del",
233
+ "details",
234
+ "div",
235
+ "em",
236
+ "h1",
237
+ "h2",
238
+ "h3",
239
+ "h4",
240
+ "h5",
241
+ "h6",
242
+ "i",
243
+ "li",
244
+ "p",
245
+ "span",
246
+ "strong",
247
+ "sub",
248
+ "summary",
249
+ "sup",
250
+ "td",
251
+ "th",
252
+ "u",
253
+ "{" + _NAMESPACES["ac"] + "}link-body",
254
+ "{" + _NAMESPACES["ac"] + "}rich-text-body",
255
+ "{" + _NAMESPACES["ac"] + "}task-body",
256
+ ]
257
+
258
+ # elements that are recursed into for whitespace normalization
259
+ _PASSTHROUGH_ELEMENTS = _NORMALIZED_ELEMENTS + [
260
+ "ol",
261
+ "table",
262
+ "tbody",
263
+ "tfoot",
264
+ "thead",
265
+ "tr",
266
+ "ul",
267
+ "{" + _NAMESPACES["ac"] + "}link",
268
+ "{" + _NAMESPACES["ac"] + "}structured-macro",
269
+ "{" + _NAMESPACES["ac"] + "}task",
270
+ "{" + _NAMESPACES["ac"] + "}task-list",
271
+ ]
272
+
273
+
274
+ def normalize_whitespace(elem: ElementType) -> None:
275
+ "Replaces linefeed with space in contexts where whitespace normalization is permitted."
276
+
277
+ if not elem.text and len(elem) < 1:
278
+ # empty element
279
+ return
280
+
281
+ if elem.tag not in _PASSTHROUGH_ELEMENTS:
282
+ # element whose descendants are to be skipped
283
+ return
284
+
285
+ if elem.tag in _NORMALIZED_ELEMENTS:
286
+ if elem.text:
287
+ elem.text = elem.text.replace("\n", " ")
288
+ for child in elem:
289
+ if child.tail:
290
+ child.tail = child.tail.replace("\n", " ")
291
+ for child in elem:
292
+ normalize_whitespace(child)
293
+
294
+
295
+ def canonicalize(content: str) -> str:
296
+ "Converts a Confluence Storage Format (CSF) document to the normalized format."
297
+
298
+ root = elements_from_string(content)
299
+ for child in root:
300
+ normalize_whitespace(child)
301
+ return elements_to_string(root)
md2conf/drawio/render.py CHANGED
@@ -47,8 +47,10 @@ def inflate(data: bytes) -> bytes:
47
47
  :returns: Uncompressed data.
48
48
  """
49
49
 
50
+ # spellchecker: disable
50
51
  # -zlib.MAX_WBITS indicates raw DEFLATE stream (no zlib/gzip headers)
51
52
  return zlib.decompress(data, -zlib.MAX_WBITS)
53
+ # spellchecker: enable
52
54
 
53
55
 
54
56
  def decompress_diagram(xml_data: bytes | str) -> ElementType:
md2conf/frontmatter.py CHANGED
@@ -8,7 +8,8 @@ Copyright 2022-2026, Levente Hunyadi
8
8
 
9
9
  import re
10
10
  import typing
11
- from typing import Any, TypeVar
11
+ from dataclasses import dataclass
12
+ from typing import TypeVar
12
13
 
13
14
  import yaml
14
15
 
@@ -43,19 +44,30 @@ def extract_value(pattern: str, text: str) -> tuple[str | None, str]:
43
44
  def extract_frontmatter_block(text: str) -> tuple[str | None, str]:
44
45
  "Extracts the front-matter from a Markdown document as a blob of unparsed text."
45
46
 
46
- return extract_value(r"(?ms)\A---$(.+?)^---$", text)
47
+ return extract_value(r"(?ms)\A---\n(.+?)^---\n", text)
47
48
 
48
49
 
49
- def extract_frontmatter_json(text: str) -> tuple[dict[str, JsonType] | None, str]:
50
+ @dataclass
51
+ class FrontMatterProperties:
52
+ data: dict[str, JsonType] | None
53
+ inner_line_count: int
54
+
55
+ @property
56
+ def outer_line_count(self) -> int:
57
+ return self.inner_line_count + 2 # account for enclosing `--` (double dash)
58
+
59
+
60
+ def extract_frontmatter_json(text: str) -> tuple[FrontMatterProperties | None, str]:
50
61
  "Extracts the front-matter from a Markdown document as a dictionary."
51
62
 
52
63
  block, text = extract_frontmatter_block(text)
53
64
 
54
- properties: dict[str, Any] | None = None
65
+ properties: FrontMatterProperties | None = None
55
66
  if block is not None:
67
+ inner_line_count = block.count("\n")
56
68
  data = yaml.safe_load(block)
57
69
  if isinstance(data, dict):
58
- properties = typing.cast(dict[str, JsonType], data)
70
+ properties = FrontMatterProperties(typing.cast(dict[str, JsonType], data), inner_line_count)
59
71
 
60
72
  return properties, text
61
73
 
@@ -65,6 +77,6 @@ def extract_frontmatter_object(tp: type[D], text: str) -> tuple[D | None, str]:
65
77
 
66
78
  value_object: D | None = None
67
79
  if properties is not None:
68
- value_object = json_to_object(tp, properties)
80
+ value_object = json_to_object(tp, properties.data)
69
81
 
70
82
  return value_object, text
md2conf/image.py CHANGED
@@ -75,7 +75,9 @@ class ImageGenerator:
75
75
  image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
76
76
  return self.create_attached_image(image_name, attrs)
77
77
 
78
- def transform_attached_data(self, image_data: bytes, attrs: ImageAttributes, relative_path: Path | None = None) -> ElementType:
78
+ def transform_attached_data(
79
+ self, image_data: bytes, attrs: ImageAttributes, relative_path: Path | None = None, *, image_type: str = "embedded"
80
+ ) -> ElementType:
79
81
  "Emits Confluence Storage Format XHTML for an attached raster or vector image."
80
82
 
81
83
  # extract dimensions and update attributes based on format
@@ -100,15 +102,15 @@ class ImageGenerator:
100
102
  alignment=attrs.alignment,
101
103
  )
102
104
 
103
- # generate filename and add as attachment
105
+ # generate filename
104
106
  if relative_path is not None:
105
107
  image_filename = attachment_name(relative_path.with_suffix(f".{self.options.output_format}"))
106
- self.attachments.add_embed(image_filename, EmbeddedFileData(image_data, attrs.alt))
107
108
  else:
108
109
  image_hash = hashlib.md5(image_data).hexdigest()
109
- image_filename = attachment_name(f"embedded_{image_hash}.{self.options.output_format}")
110
- self.attachments.add_embed(image_filename, EmbeddedFileData(image_data))
110
+ image_filename = attachment_name(f"{image_type}_{image_hash}.{self.options.output_format}")
111
111
 
112
+ # add as attachment
113
+ self.attachments.add_embed(image_filename, EmbeddedFileData(image_data, attrs.alt))
112
114
  return self.create_attached_image(image_filename, attrs)
113
115
 
114
116
  def create_attached_image(self, image_name: str, attrs: ImageAttributes) -> ElementType:
md2conf/latex.py CHANGED
@@ -52,6 +52,13 @@ else:
52
52
  # spellchecker:disable-next-line
53
53
  fig.text(x=0, y=0, s=f"${expression}$", fontsize=font_size) # pyright: ignore[reportUnknownMemberType]
54
54
 
55
+ metadata: dict[str, str | None] = {"Title": expression}
56
+ match format:
57
+ case "png":
58
+ metadata.update({"Software": None})
59
+ case "svg":
60
+ metadata.update({"Creator": None, "Date": None, "Format": None, "Type": None})
61
+
55
62
  # save the image
56
63
  fig.savefig( # pyright: ignore[reportUnknownMemberType]
57
64
  f,
@@ -59,7 +66,7 @@ else:
59
66
  format=format,
60
67
  bbox_inches="tight",
61
68
  pad_inches=0.0,
62
- metadata={"Title": expression} if format == "png" else None,
69
+ metadata=metadata,
63
70
  )
64
71
 
65
72
  # close the figure to free memory
md2conf/markdown.py CHANGED
@@ -6,6 +6,7 @@ Copyright 2022-2026, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
+ import re
9
10
  import xml.etree.ElementTree
10
11
  from typing import Any
11
12
 
@@ -86,7 +87,7 @@ _CONVERTER = markdown.Markdown(
86
87
  "sane_lists",
87
88
  ],
88
89
  extension_configs={
89
- "footnotes": {"BACKLINK_TITLE": ""},
90
+ "footnotes": {"BACKLINK_TITLE": ""}, # spellchecker:disable-line
90
91
  "pymdownx.arithmatex": {"generic": True, "preview": False, "tex_inline_wrap": ["", ""], "tex_block_wrap": ["", ""]},
91
92
  "pymdownx.emoji": {"emoji_generator": _emoji_generator},
92
93
  "pymdownx.highlight": {
@@ -114,3 +115,69 @@ def markdown_to_html(content: str) -> str:
114
115
  _CONVERTER.reset()
115
116
  html = _CONVERTER.convert(content)
116
117
  return html
118
+
119
+
120
+ # matches the start and end marker of a fenced code block
121
+ _FENCED_CODE_REGEXP = re.compile(r"^\s*(?:`{3,}|~{3,})", re.MULTILINE)
122
+
123
+ # matches a regular table row (but not the column alignment row)
124
+ _TABLE_ROW_REGEXP = re.compile(r"^\|\s*([^\s:-]+.*?)\s*\|$", re.MULTILINE)
125
+
126
+
127
+ def markdown_with_line_numbers(input_lines: list[str], start_line_number: int) -> list[str]:
128
+ """
129
+ Injects XHTML line number markers in Markdown text.
130
+
131
+ Unfortunately, Python-Markdown doesn't propagate line numbers to downstream processors, making it challenging to
132
+ display helpful error messages to end users. This function injects XHTML self-closing tags into the Markdown source:
133
+
134
+ ```
135
+ <line-number value="#" />
136
+ ```
137
+
138
+ When tree visitors process the XHTML content generated by Python-Markdown and an error is triggered, the exception
139
+ handler can use these placeholder elements to guide end users in which part of the Markdown file they should look
140
+ by translating a tree node in the intermediate output into a line number in the source.
141
+
142
+ :param input_lines: Markdown source file split into lines.
143
+ :param start_line_number: The first line of the Markdown document excluding front-matter, or 1 if there is no front-matter.
144
+ """
145
+
146
+ output_lines: list[str] = []
147
+
148
+ fence_marker: str | None = None
149
+ for number, line in enumerate(input_lines, start=start_line_number):
150
+ if not line:
151
+ output_lines.append("")
152
+ continue
153
+
154
+ # fenced code blocks
155
+ if fence_match := _FENCED_CODE_REGEXP.match(line):
156
+ marker = fence_match.group()
157
+ if fence_marker is None:
158
+ fence_marker = marker
159
+ elif marker == fence_marker:
160
+ fence_marker = None
161
+ elif fence_marker is None:
162
+ # not inside a fenced code block
163
+ if (
164
+ # not an admonition
165
+ not line.startswith("!!! ")
166
+ # not a Setext heading
167
+ and not (line.startswith("===") or line.startswith("---"))
168
+ # not a decorated ATX heading
169
+ and not line.endswith("#")
170
+ # not a math block formula
171
+ and not (line.startswith("$$") and line.endswith("$$"))
172
+ # not a Markdown table
173
+ and not (line.startswith("|") or line.endswith("|"))
174
+ # not a block-level HTML tag
175
+ and not (line.startswith("<") or line.endswith(">"))
176
+ ):
177
+ line = f'{line}<line-number value="{number}" />'
178
+ elif row_match := _TABLE_ROW_REGEXP.match(line):
179
+ line = f'| {row_match.group(1)}<line-number value="{number}" /> |'
180
+
181
+ output_lines.append(line)
182
+
183
+ return output_lines
md2conf/options.py CHANGED
@@ -6,10 +6,11 @@ Copyright 2022-2026, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
- import dataclasses
10
- from dataclasses import dataclass
9
+ from dataclasses import dataclass, field
11
10
  from typing import Literal
12
11
 
12
+ from .clio import boolean_option, composite_option, value_option
13
+
13
14
 
14
15
  @dataclass
15
16
  class ConfluencePageID:
@@ -25,8 +26,11 @@ class ImageLayoutOptions:
25
26
  :param max_width: Maximum display width for images [px]. Wider images are scaled down for page display. Original size kept for full-size viewing.
26
27
  """
27
28
 
28
- alignment: Literal["center", "left", "right"] | None = None
29
- max_width: int | None = None
29
+ alignment: Literal["center", "left", "right", None] = field(default=None, metadata=value_option("Alignment for block-level images and formulas."))
30
+ max_width: int | None = field(
31
+ default=None,
32
+ metadata=value_option("Maximum display width for images [px]. Wider images are scaled down for page display."),
33
+ )
30
34
 
31
35
 
32
36
  @dataclass
@@ -38,8 +42,8 @@ class TableLayoutOptions:
38
42
  :param display_mode: Whether to use fixed or responsive column widths.
39
43
  """
40
44
 
41
- width: int | None = None
42
- display_mode: Literal["fixed", "responsive"] | None = None
45
+ width: int | None = field(default=None, metadata=value_option("Maximum table width in pixels."))
46
+ display_mode: Literal["responsive", "fixed"] = field(default="responsive", metadata=value_option("Set table display mode."))
43
47
 
44
48
 
45
49
  @dataclass
@@ -54,9 +58,9 @@ class LayoutOptions:
54
58
  :param alignment: Default alignment (unless overridden with more specific setting).
55
59
  """
56
60
 
57
- image: ImageLayoutOptions = dataclasses.field(default_factory=ImageLayoutOptions)
58
- table: TableLayoutOptions = dataclasses.field(default_factory=TableLayoutOptions)
59
- alignment: Literal["center", "left", "right"] | None = None
61
+ image: ImageLayoutOptions = field(default_factory=ImageLayoutOptions, metadata=composite_option())
62
+ table: TableLayoutOptions = field(default_factory=TableLayoutOptions, metadata=composite_option())
63
+ alignment: Literal["center", "left", "right", None] = field(default=None, metadata=value_option("Default alignment for block-level content."))
60
64
 
61
65
  def get_image_alignment(self) -> Literal["center", "left", "right"]:
62
66
  return self.image.alignment or self.alignment or "center"
@@ -69,8 +73,8 @@ class ConverterOptions:
69
73
 
70
74
  :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
71
75
  conversion rules for the identifier.
72
- :param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
73
- plain text; when false, raise an exception.
76
+ :param force_valid_url: If enabled, raise an exception when relative URLs point to an invalid location. If disabled,
77
+ ignore invalid URLs, emit a warning and replace the anchor with plain text.
74
78
  :param skip_title_heading: Whether to remove the first heading from document body when used as page title.
75
79
  :param prefer_raster: Whether to choose PNG files over SVG files when available.
76
80
  :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
@@ -83,18 +87,81 @@ class ConverterOptions:
83
87
  :param layout: Layout options for content on a Confluence page.
84
88
  """
85
89
 
86
- heading_anchors: bool = False
87
- ignore_invalid_url: bool = False
88
- skip_title_heading: bool = False
89
- prefer_raster: bool = True
90
- render_drawio: bool = False
91
- render_mermaid: bool = False
92
- render_plantuml: bool = False
93
- render_latex: bool = False
94
- diagram_output_format: Literal["png", "svg"] = "png"
95
- webui_links: bool = False
96
- use_panel: bool = False
97
- layout: LayoutOptions = dataclasses.field(default_factory=LayoutOptions)
90
+ heading_anchors: bool = field(
91
+ default=False,
92
+ metadata=boolean_option(
93
+ "Place an anchor at each section heading with GitHub-style same-page identifiers.",
94
+ "Omit the extra anchor from section headings. (May break manually placed same-page references.)",
95
+ ),
96
+ )
97
+ force_valid_url: bool = field(
98
+ default=True,
99
+ metadata=boolean_option(
100
+ "Raise an error when relative URLs point to an invalid location.",
101
+ "Emit a warning but otherwise ignore relative URLs that point to an invalid location.",
102
+ ),
103
+ )
104
+ skip_title_heading: bool = field(
105
+ default=False,
106
+ metadata=boolean_option(
107
+ "Remove the first heading from document body when it is used as the page title (does not apply if title comes from front-matter).",
108
+ "Keep the first heading in document body even when used as page title.",
109
+ ),
110
+ )
111
+ prefer_raster: bool = field(
112
+ default=True,
113
+ metadata=boolean_option(
114
+ "Prefer PNG over SVG when both exist.",
115
+ "Use SVG files directly instead of preferring PNG equivalents.",
116
+ ),
117
+ )
118
+ render_drawio: bool = field(
119
+ default=True,
120
+ metadata=boolean_option(
121
+ "Render draw.io diagrams as image files. (Installed utility required to covert.)",
122
+ "Upload draw.io diagram sources as Confluence page attachments. (Marketplace app required to display.)",
123
+ ),
124
+ )
125
+ render_mermaid: bool = field(
126
+ default=True,
127
+ metadata=boolean_option(
128
+ "Render Mermaid diagrams as image files. (Installed utility required to convert.)",
129
+ "Upload Mermaid diagram sources as Confluence page attachments. (Marketplace app required to display.)",
130
+ ),
131
+ )
132
+ render_plantuml: bool = field(
133
+ default=True,
134
+ metadata=boolean_option(
135
+ "Render PlantUML diagrams as image files. (Installed utility required to convert.)",
136
+ "Upload PlantUML diagram sources as Confluence page attachments. (Marketplace app required to display.)",
137
+ ),
138
+ )
139
+ render_latex: bool = field(
140
+ default=True,
141
+ metadata=boolean_option(
142
+ "Render LaTeX formulas as image files. (Matplotlib required to convert.)",
143
+ "Inline LaTeX formulas in Confluence page. (Marketplace app required to display.)",
144
+ ),
145
+ )
146
+ diagram_output_format: Literal["png", "svg"] = field(
147
+ default="png",
148
+ metadata=value_option("Format for rendering Mermaid and draw.io diagrams."),
149
+ )
150
+ webui_links: bool = field(
151
+ default=False,
152
+ metadata=boolean_option(
153
+ "Enable Confluence Web UI links. (Typically required for on-prem versions of Confluence.)",
154
+ "Use hierarchical links including space and page ID.",
155
+ ),
156
+ )
157
+ use_panel: bool = field(
158
+ default=False,
159
+ metadata=boolean_option(
160
+ "Transform admonitions and alerts into a Confluence custom panel.",
161
+ "Use standard Confluence macro types for admonitions and alerts (info, tip, note and warning).",
162
+ ),
163
+ )
164
+ layout: LayoutOptions = field(default_factory=LayoutOptions, metadata=composite_option())
98
165
 
99
166
 
100
167
  @dataclass
@@ -108,6 +175,7 @@ class DocumentOptions:
108
175
  :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
109
176
  :param skip_update: Whether to skip saving Confluence page ID in Markdown files.
110
177
  :param converter: Options for converting an HTML tree into Confluence Storage Format.
178
+ :param line_numbers: Inject line numbers in Markdown source to help localize conversion errors.
111
179
  """
112
180
 
113
181
  root_page_id: ConfluencePageID | None = None
@@ -115,4 +183,5 @@ class DocumentOptions:
115
183
  title_prefix: str | None = None
116
184
  generated_by: str | None = "This page has been generated with a tool."
117
185
  skip_update: bool = False
118
- converter: ConverterOptions = dataclasses.field(default_factory=ConverterOptions)
186
+ converter: ConverterOptions = field(default_factory=ConverterOptions)
187
+ line_numbers: bool = False
@@ -148,7 +148,7 @@ class PlantUMLExtension(MarketplaceExtension):
148
148
  return AC_ELEM(
149
149
  "structured-macro",
150
150
  {
151
- AC_ATTR("name"): "plantumlcloud",
151
+ AC_ATTR("name"): "plantumlcloud", # spellchecker:disable-line
152
152
  AC_ATTR("schema-version"): "1",
153
153
  "data-layout": "default",
154
154
  AC_ATTR("local-id"): local_id,