markdown-to-confluence 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/converter.py CHANGED
@@ -25,7 +25,9 @@ import markdown
25
25
  import yaml
26
26
  from lxml.builder import ElementMaker
27
27
 
28
- from . import mermaid
28
+ from .mermaid import render_diagram
29
+ from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
30
+ from .properties import PageError
29
31
 
30
32
  namespaces = {
31
33
  "ac": "http://atlassian.com/content",
@@ -91,9 +93,11 @@ def markdown_to_html(content: str) -> str:
91
93
  extensions=[
92
94
  "admonition",
93
95
  "markdown.extensions.tables",
94
- "markdown.extensions.fenced_code",
96
+ # "markdown.extensions.fenced_code",
95
97
  "pymdownx.emoji",
98
+ "pymdownx.highlight", # required by `pymdownx.superfences`
96
99
  "pymdownx.magiclink",
100
+ "pymdownx.superfences",
97
101
  "pymdownx.tilde",
98
102
  "sane_lists",
99
103
  "md_in_html",
@@ -101,7 +105,10 @@ def markdown_to_html(content: str) -> str:
101
105
  extension_configs={
102
106
  "pymdownx.emoji": {
103
107
  "emoji_generator": emoji_generator,
104
- }
108
+ },
109
+ "pymdownx.highlight": {
110
+ "use_pygments": False,
111
+ },
105
112
  },
106
113
  )
107
114
 
@@ -136,8 +143,8 @@ def _elements_from_strings(dtd_path: Path, items: list[str]) -> ET._Element:
136
143
 
137
144
  try:
138
145
  return ET.fromstringlist(data, parser=parser)
139
- except ET.XMLSyntaxError as e:
140
- raise ParseError(e)
146
+ except ET.XMLSyntaxError as ex:
147
+ raise ParseError() from ex
141
148
 
142
149
 
143
150
  def elements_from_strings(items: list[str]) -> ET._Element:
@@ -234,15 +241,6 @@ _languages = [
234
241
  ]
235
242
 
236
243
 
237
- @dataclass
238
- class ConfluencePageMetadata:
239
- domain: str
240
- base_path: str
241
- page_id: str
242
- space_key: Optional[str]
243
- title: str
244
-
245
-
246
244
  class NodeVisitor:
247
245
  def visit(self, node: ET._Element) -> None:
248
246
  "Recursively visits all descendants of this node."
@@ -271,6 +269,53 @@ def title_to_identifier(title: str) -> str:
271
269
  return s
272
270
 
273
271
 
272
+ def element_to_text(node: ET._Element) -> str:
273
+ "Returns all text contained in an element as a concatenated string."
274
+
275
+ return "".join(node.itertext()).strip()
276
+
277
+
278
+ @dataclass
279
+ class TableOfContentsEntry:
280
+ level: int
281
+ text: str
282
+
283
+
284
+ class TableOfContents:
285
+ "Builds a table of contents from Markdown headings."
286
+
287
+ headings: list[TableOfContentsEntry]
288
+
289
+ def __init__(self) -> None:
290
+ self.headings = []
291
+
292
+ def add(self, level: int, text: str) -> None:
293
+ """
294
+ Adds a heading to the table of contents.
295
+
296
+ :param level: Markdown heading level (e.g. `1` for first-level heading).
297
+ :param text: Markdown heading text.
298
+ """
299
+
300
+ self.headings.append(TableOfContentsEntry(level, text))
301
+
302
+ def get_title(self) -> Optional[str]:
303
+ """
304
+ Returns a proposed document title (if unique).
305
+
306
+ :returns: Title text, or `None` if no unique title can be inferred.
307
+ """
308
+
309
+ for level in range(1, 7):
310
+ try:
311
+ (title,) = (item.text for item in self.headings if item.level == level)
312
+ return title
313
+ except ValueError:
314
+ pass
315
+
316
+ return None
317
+
318
+
274
319
  @dataclass
275
320
  class ConfluenceConverterOptions:
276
321
  """
@@ -299,9 +344,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
299
344
  path: Path
300
345
  base_dir: Path
301
346
  root_dir: Path
347
+ toc: TableOfContents
302
348
  links: list[str]
303
349
  images: list[Path]
304
350
  embedded_images: dict[str, bytes]
351
+ site_metadata: ConfluenceSiteMetadata
305
352
  page_metadata: dict[Path, ConfluencePageMetadata]
306
353
 
307
354
  def __init__(
@@ -309,6 +356,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
309
356
  options: ConfluenceConverterOptions,
310
357
  path: Path,
311
358
  root_dir: Path,
359
+ site_metadata: ConfluenceSiteMetadata,
312
360
  page_metadata: dict[Path, ConfluencePageMetadata],
313
361
  ) -> None:
314
362
  super().__init__()
@@ -316,14 +364,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
316
364
  self.path = path
317
365
  self.base_dir = path.parent
318
366
  self.root_dir = root_dir
367
+ self.toc = TableOfContents()
319
368
  self.links = []
320
369
  self.images = []
321
370
  self.embedded_images = {}
371
+ self.site_metadata = site_metadata
322
372
  self.page_metadata = page_metadata
323
373
 
324
374
  def _transform_heading(self, heading: ET._Element) -> None:
325
- title = "".join(heading.itertext()).strip()
326
-
327
375
  for e in heading:
328
376
  self.visit(e)
329
377
 
@@ -336,7 +384,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
336
384
  AC(
337
385
  "parameter",
338
386
  {ET.QName(namespaces["ac"], "name"): ""},
339
- title_to_identifier(title),
387
+ title_to_identifier(element_to_text(heading)),
340
388
  ),
341
389
  )
342
390
 
@@ -409,13 +457,20 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
409
457
  self.links.append(url)
410
458
 
411
459
  if self.options.webui_links:
412
- page_url = f"{link_metadata.base_path}pages/viewpage.action?pageId={link_metadata.page_id}"
460
+ page_url = f"{self.site_metadata.base_path}pages/viewpage.action?pageId={link_metadata.page_id}"
413
461
  else:
414
- page_url = f"{link_metadata.base_path}spaces/{link_metadata.space_key}/pages/{link_metadata.page_id}/{link_metadata.title}"
462
+ space_key = link_metadata.space_key or self.site_metadata.space_key
463
+
464
+ if space_key is None:
465
+ raise DocumentError(
466
+ "Confluence space key required for building full web URLs"
467
+ )
468
+
469
+ page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{link_metadata.title}"
415
470
 
416
471
  components = ParseResult(
417
472
  scheme="https",
418
- netloc=link_metadata.domain,
473
+ netloc=self.site_metadata.domain,
419
474
  path=page_url,
420
475
  params="",
421
476
  query="",
@@ -527,11 +582,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
527
582
  {ET.QName(namespaces["ac"], "name"): "language"},
528
583
  language,
529
584
  ),
530
- AC(
531
- "parameter",
532
- {ET.QName(namespaces["ac"], "name"): "linenumbers"},
533
- "true",
534
- ),
535
585
  AC("plain-text-body", ET.CDATA(content)),
536
586
  )
537
587
 
@@ -539,7 +589,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
539
589
  "Transforms a Mermaid diagram code block."
540
590
 
541
591
  if self.options.render_mermaid:
542
- image_data = mermaid.render(content, self.options.diagram_output_format)
592
+ image_data = render_diagram(content, self.options.diagram_output_format)
543
593
  image_hash = hashlib.md5(image_data).hexdigest()
544
594
  image_filename = attachment_name(
545
595
  f"embedded_{image_hash}.{self.options.diagram_output_format}"
@@ -799,10 +849,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
799
849
  if not isinstance(child.tag, str):
800
850
  return None
801
851
 
802
- if self.options.heading_anchors:
803
- # <h1>...</h1>
804
- # <h2>...</h2> ...
805
- if re.match(r"^h[1-6]$", child.tag, flags=re.IGNORECASE) is not None:
852
+ # <h1>...</h1>
853
+ # <h2>...</h2> ...
854
+ m = re.match(r"^h([1-6])$", child.tag, flags=re.IGNORECASE)
855
+ if m is not None:
856
+ level = int(m.group(1))
857
+ title = element_to_text(child)
858
+ self.toc.add(level, title)
859
+
860
+ if self.options.heading_anchors:
806
861
  self._transform_heading(child)
807
862
  return None
808
863
 
@@ -891,7 +946,7 @@ class ConfluenceStorageFormatCleaner(NodeVisitor):
891
946
 
892
947
 
893
948
  class DocumentError(RuntimeError):
894
- pass
949
+ "Raised when a converted Markdown document has an unexpected element or attribute."
895
950
 
896
951
 
897
952
  def extract_value(pattern: str, text: str) -> tuple[Optional[str], str]:
@@ -906,6 +961,14 @@ def extract_value(pattern: str, text: str) -> tuple[Optional[str], str]:
906
961
  return value, text
907
962
 
908
963
 
964
+ @dataclass
965
+ class ConfluencePageID:
966
+ page_id: str
967
+
968
+ def __init__(self, page_id: str):
969
+ self.page_id = page_id
970
+
971
+
909
972
  @dataclass
910
973
  class ConfluenceQualifiedID:
911
974
  page_id: str
@@ -980,13 +1043,17 @@ class ConfluenceDocumentOptions:
980
1043
  ignore_invalid_url: bool = False
981
1044
  heading_anchors: bool = False
982
1045
  generated_by: Optional[str] = "This page has been generated with a tool."
983
- root_page_id: Optional[str] = None
1046
+ root_page_id: Optional[ConfluencePageID] = None
984
1047
  keep_hierarchy: bool = False
985
1048
  render_mermaid: bool = False
986
1049
  diagram_output_format: Literal["png", "svg"] = "png"
987
1050
  webui_links: bool = False
988
1051
 
989
1052
 
1053
+ class ConversionError(RuntimeError):
1054
+ "Raised when a Markdown document cannot be converted to Confluence Storage Format."
1055
+
1056
+
990
1057
  class ConfluenceDocument:
991
1058
  id: ConfluenceQualifiedID
992
1059
  title: Optional[str]
@@ -996,14 +1063,15 @@ class ConfluenceDocument:
996
1063
  options: ConfluenceDocumentOptions
997
1064
  root: ET._Element
998
1065
 
999
- def __init__(
1000
- self,
1066
+ @classmethod
1067
+ def create(
1068
+ cls,
1001
1069
  path: Path,
1002
1070
  options: ConfluenceDocumentOptions,
1003
1071
  root_dir: Path,
1072
+ site_metadata: ConfluenceSiteMetadata,
1004
1073
  page_metadata: dict[Path, ConfluencePageMetadata],
1005
- ) -> None:
1006
- self.options = options
1074
+ ) -> "ConfluenceDocument":
1007
1075
  path = path.resolve(True)
1008
1076
 
1009
1077
  with open(path, "r", encoding="utf-8") as f:
@@ -1019,35 +1087,61 @@ class ConfluenceDocument:
1019
1087
  metadata.page_id, metadata.space_key
1020
1088
  )
1021
1089
  if qualified_id is None:
1022
- raise ValueError("missing Confluence page ID")
1090
+ raise PageError("missing Confluence page ID")
1091
+
1092
+ return ConfluenceDocument(
1093
+ path, text, qualified_id, options, root_dir, site_metadata, page_metadata
1094
+ )
1095
+
1096
+ def __init__(
1097
+ self,
1098
+ path: Path,
1099
+ text: str,
1100
+ qualified_id: ConfluenceQualifiedID,
1101
+ options: ConfluenceDocumentOptions,
1102
+ root_dir: Path,
1103
+ site_metadata: ConfluenceSiteMetadata,
1104
+ page_metadata: dict[Path, ConfluencePageMetadata],
1105
+ ) -> None:
1106
+ self.options = options
1023
1107
  self.id = qualified_id
1024
1108
 
1109
+ # extract frontmatter
1110
+ self.title, text = extract_frontmatter_title(text)
1111
+
1025
1112
  # extract 'generated-by' tag text
1026
1113
  generated_by_tag, text = extract_value(
1027
1114
  r"<!--\s+generated-by:\s*(.*)\s+-->", text
1028
1115
  )
1029
1116
 
1030
- # extract frontmatter
1031
- self.title, text = extract_frontmatter_title(text)
1032
-
1033
1117
  # convert to HTML
1034
1118
  html = markdown_to_html(text)
1035
1119
 
1036
1120
  # parse Markdown document
1037
1121
  if self.options.generated_by is not None:
1038
- generated_by = self.options.generated_by
1039
1122
  if generated_by_tag is not None:
1040
- generated_by = generated_by_tag
1123
+ generated_by_text = generated_by_tag
1124
+ else:
1125
+ generated_by_text = self.options.generated_by
1126
+ else:
1127
+ generated_by_text = None
1128
+
1129
+ if generated_by_text is not None:
1130
+ generated_by_html = markdown_to_html(generated_by_text)
1041
1131
 
1042
1132
  content = [
1043
1133
  '<ac:structured-macro ac:name="info" ac:schema-version="1">',
1044
- f"<ac:rich-text-body><p>{generated_by}</p></ac:rich-text-body>",
1134
+ f"<ac:rich-text-body>{generated_by_html}</ac:rich-text-body>",
1045
1135
  "</ac:structured-macro>",
1046
1136
  html,
1047
1137
  ]
1048
1138
  else:
1049
1139
  content = [html]
1050
- self.root = elements_from_strings(content)
1140
+
1141
+ try:
1142
+ self.root = elements_from_strings(content)
1143
+ except ParseError as ex:
1144
+ raise ConversionError(path) from ex
1051
1145
 
1052
1146
  converter = ConfluenceStorageFormatConverter(
1053
1147
  ConfluenceConverterOptions(
@@ -1059,6 +1153,7 @@ class ConfluenceDocument:
1059
1153
  ),
1060
1154
  path,
1061
1155
  root_dir,
1156
+ site_metadata,
1062
1157
  page_metadata,
1063
1158
  )
1064
1159
  converter.visit(self.root)
@@ -1066,6 +1161,9 @@ class ConfluenceDocument:
1066
1161
  self.images = converter.images
1067
1162
  self.embedded_images = converter.embedded_images
1068
1163
 
1164
+ if self.title is None:
1165
+ self.title = converter.toc.get_title()
1166
+
1069
1167
  def xhtml(self) -> str:
1070
1168
  return elements_to_string(self.root)
1071
1169
 
md2conf/emoji.py CHANGED
@@ -10,6 +10,8 @@ import pathlib
10
10
 
11
11
  import pymdownx.emoji1_db as emoji_db
12
12
 
13
+ EMOJI_PAGE_ID = "86918529216"
14
+
13
15
 
14
16
  def generate_source(path: pathlib.Path) -> None:
15
17
  "Generates a source Markdown document for testing emojis."
@@ -17,7 +19,7 @@ def generate_source(path: pathlib.Path) -> None:
17
19
  emojis = emoji_db.emoji
18
20
 
19
21
  with open(path, "w") as f:
20
- print("<!-- confluence-page-id: 86918529216 -->", file=f)
22
+ print(f"<!-- confluence-page-id: {EMOJI_PAGE_ID} -->", file=f)
21
23
  print("<!-- This file has been generated by a script. -->", file=f)
22
24
  print(file=f)
23
25
  print("## Emoji", file=f)
md2conf/local.py ADDED
@@ -0,0 +1,132 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ import hashlib
10
+ import logging
11
+ import os
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ from .converter import (
16
+ ConfluenceDocument,
17
+ ConfluenceDocumentOptions,
18
+ ConfluencePageID,
19
+ ConfluenceQualifiedID,
20
+ extract_qualified_id,
21
+ )
22
+ from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
23
+ from .processor import Converter, Processor, ProcessorFactory
24
+ from .properties import PageError
25
+
26
+ LOGGER = logging.getLogger(__name__)
27
+
28
+
29
+ class LocalProcessor(Processor):
30
+ """
31
+ Transforms a single Markdown page or a directory of Markdown pages into Confluence Storage Format (CSF) documents.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ options: ConfluenceDocumentOptions,
37
+ site: ConfluenceSiteMetadata,
38
+ *,
39
+ out_dir: Optional[Path],
40
+ root_dir: Path,
41
+ ) -> None:
42
+ """
43
+ Initializes a new processor instance.
44
+
45
+ :param options: Options that control the generated page content.
46
+ :param site: Data associated with a Confluence wiki site.
47
+ :param out_dir: File system directory to write generated CSF documents to.
48
+ :param root_dir: File system directory that acts as topmost root node.
49
+ """
50
+
51
+ super().__init__(options, site, root_dir)
52
+ self.out_dir = out_dir or root_dir
53
+
54
+ def _get_or_create_page(
55
+ self,
56
+ absolute_path: Path,
57
+ parent_id: Optional[ConfluencePageID],
58
+ *,
59
+ title: Optional[str] = None,
60
+ ) -> ConfluencePageMetadata:
61
+ """
62
+ Extracts metadata from a Markdown file.
63
+ """
64
+
65
+ # parse file
66
+ with open(absolute_path, "r", encoding="utf-8") as f:
67
+ text = f.read()
68
+
69
+ qualified_id, text = extract_qualified_id(text)
70
+
71
+ if qualified_id is None:
72
+ if parent_id is None:
73
+ raise PageError(
74
+ f"expected: parent page ID for Markdown file with no linked Confluence page: {absolute_path}"
75
+ )
76
+
77
+ hash = hashlib.md5(text.encode("utf-8"))
78
+ digest = "".join(f"{c:x}" for c in hash.digest())
79
+ LOGGER.info("Identifier %s assigned to page: %s", digest, absolute_path)
80
+ qualified_id = ConfluenceQualifiedID(digest)
81
+
82
+ return ConfluencePageMetadata(
83
+ page_id=qualified_id.page_id,
84
+ space_key=qualified_id.space_key,
85
+ title="",
86
+ overwrite=True,
87
+ )
88
+
89
+ def _save_document(self, document: ConfluenceDocument, path: Path) -> None:
90
+ """
91
+ Saves a new version of a Confluence document.
92
+
93
+ A derived class may invoke Confluence REST API to persist the new version.
94
+ """
95
+
96
+ content = document.xhtml()
97
+ out_path = self.out_dir / path.relative_to(self.root_dir).with_suffix(".csf")
98
+ os.makedirs(out_path.parent, exist_ok=True)
99
+ with open(out_path, "w", encoding="utf-8") as f:
100
+ f.write(content)
101
+
102
+
103
+ class LocalProcessorFactory(ProcessorFactory):
104
+ out_dir: Optional[Path]
105
+
106
+ def __init__(
107
+ self,
108
+ options: ConfluenceDocumentOptions,
109
+ site: ConfluenceSiteMetadata,
110
+ out_dir: Optional[Path] = None,
111
+ ) -> None:
112
+ super().__init__(options, site)
113
+ self.out_dir = out_dir
114
+
115
+ def create(self, root_dir: Path) -> Processor:
116
+ return LocalProcessor(
117
+ self.options, self.site, out_dir=self.out_dir, root_dir=root_dir
118
+ )
119
+
120
+
121
+ class LocalConverter(Converter):
122
+ """
123
+ The entry point for Markdown to Confluence conversion.
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ options: ConfluenceDocumentOptions,
129
+ site: ConfluenceSiteMetadata,
130
+ out_dir: Optional[Path] = None,
131
+ ) -> None:
132
+ super().__init__(LocalProcessorFactory(options, site, out_dir))
md2conf/mermaid.py CHANGED
@@ -47,7 +47,7 @@ def has_mmdc() -> bool:
47
47
  return shutil.which(executable) is not None
48
48
 
49
49
 
50
- def render(source: str, output_format: Literal["png", "svg"] = "png") -> bytes:
50
+ def render_diagram(source: str, output_format: Literal["png", "svg"] = "png") -> bytes:
51
51
  "Generates a PNG or SVG image from a Mermaid diagram source."
52
52
 
53
53
  filename = f"tmp_mermaid.{output_format}"
md2conf/metadata.py ADDED
@@ -0,0 +1,42 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Optional
11
+
12
+
13
+ @dataclass
14
+ class ConfluenceSiteMetadata:
15
+ """
16
+ Data associated with a Confluence wiki site.
17
+
18
+ :param domain: Confluence organization domain (e.g. `levente-hunyadi.atlassian.net`).
19
+ :param base_path: Base path for Confluence (default: `/wiki/`).
20
+ :param space_key: Confluence space key for new pages (e.g. `~hunyadi` or `INST`).
21
+ """
22
+
23
+ domain: str
24
+ base_path: str
25
+ space_key: Optional[str]
26
+
27
+
28
+ @dataclass
29
+ class ConfluencePageMetadata:
30
+ """
31
+ Data associated with a Confluence page.
32
+
33
+ :param page_id: Confluence page ID.
34
+ :param space_key: Confluence space key.
35
+ :param title: Document title.
36
+ :param overwrite: True if operations are allowed to update document properties (e.g. title).
37
+ """
38
+
39
+ page_id: str
40
+ space_key: Optional[str]
41
+ title: str
42
+ overwrite: bool