markdown-to-confluence 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/METADATA +275 -208
  2. markdown_to_confluence-0.5.5.dist-info/RECORD +57 -0
  3. {markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/WHEEL +1 -1
  4. md2conf/__init__.py +1 -1
  5. md2conf/__main__.py +61 -189
  6. md2conf/api.py +35 -69
  7. md2conf/attachment.py +4 -3
  8. md2conf/clio.py +226 -0
  9. md2conf/compatibility.py +5 -0
  10. md2conf/converter.py +239 -147
  11. md2conf/csf.py +89 -9
  12. md2conf/drawio/extension.py +3 -3
  13. md2conf/drawio/render.py +2 -0
  14. md2conf/extension.py +4 -0
  15. md2conf/external.py +25 -8
  16. md2conf/frontmatter.py +18 -6
  17. md2conf/image.py +17 -14
  18. md2conf/latex.py +8 -1
  19. md2conf/markdown.py +68 -1
  20. md2conf/mermaid/render.py +1 -1
  21. md2conf/options.py +95 -24
  22. md2conf/plantuml/extension.py +7 -7
  23. md2conf/plantuml/render.py +6 -7
  24. md2conf/png.py +10 -6
  25. md2conf/processor.py +24 -3
  26. md2conf/publisher.py +193 -36
  27. md2conf/reflection.py +74 -0
  28. md2conf/scanner.py +16 -6
  29. md2conf/serializer.py +12 -1
  30. md2conf/svg.py +131 -109
  31. md2conf/toc.py +72 -0
  32. md2conf/xml.py +45 -0
  33. markdown_to_confluence-0.5.3.dist-info/RECORD +0 -55
  34. {markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/entry_points.txt +0 -0
  35. {markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/licenses/LICENSE +0 -0
  36. {markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/top_level.txt +0 -0
  37. {markdown_to_confluence-0.5.3.dist-info → markdown_to_confluence-0.5.5.dist-info}/zip-safe +0 -0
  38. /md2conf/{puppeteer-config.json → mermaid/puppeteer-config.json} +0 -0
@@ -19,7 +19,7 @@ from md2conf.compatibility import override, path_relative_to
19
19
  from md2conf.csf import AC_ATTR, AC_ELEM
20
20
  from md2conf.extension import MarketplaceExtension
21
21
  from md2conf.formatting import ImageAttributes
22
- from md2conf.svg import get_svg_dimensions_from_bytes
22
+ from md2conf.svg import get_svg_dimensions
23
23
 
24
24
  from .config import PlantUMLConfigProperties
25
25
  from .render import compress_plantuml_data, has_plantuml, render_diagram
@@ -87,7 +87,7 @@ class PlantUMLExtension(MarketplaceExtension):
87
87
  image_data = render_diagram(content, "svg", config=config)
88
88
 
89
89
  # extract dimensions from SVG
90
- width, height = get_svg_dimensions_from_bytes(image_data)
90
+ dimensions = get_svg_dimensions(image_data)
91
91
 
92
92
  # generate SVG filename and add as attachment
93
93
  if relative_path is not None:
@@ -98,11 +98,11 @@ class PlantUMLExtension(MarketplaceExtension):
98
98
  svg_filename = attachment_name(f"embedded_{plantuml_hash}.svg")
99
99
  self.attachments.add_embed(svg_filename, EmbeddedFileData(image_data))
100
100
 
101
- return self._create_plantuml_macro(content, svg_filename, width, height)
101
+ return self._create_plantuml_macro(content, svg_filename, dimensions)
102
102
  else:
103
103
  return self._create_plantuml_macro(content)
104
104
 
105
- def _create_plantuml_macro(self, source: str, filename: str | None = None, width: int | None = None, height: int | None = None) -> ElementType:
105
+ def _create_plantuml_macro(self, source: str, filename: str | None = None, dimensions: tuple[int, int] | None = None) -> ElementType:
106
106
  """
107
107
  A PlantUML diagram using a `structured-macro` with embedded data.
108
108
 
@@ -128,7 +128,8 @@ class PlantUMLExtension(MarketplaceExtension):
128
128
  parameters.append(AC_ELEM("parameter", {AC_ATTR("name"): "filename"}, filename))
129
129
 
130
130
  # add optional dimension parameters if available
131
- if width is not None:
131
+ if dimensions is not None:
132
+ width, height = dimensions
132
133
  parameters.append(
133
134
  AC_ELEM(
134
135
  "parameter",
@@ -136,7 +137,6 @@ class PlantUMLExtension(MarketplaceExtension):
136
137
  str(width),
137
138
  )
138
139
  )
139
- if height is not None:
140
140
  parameters.append(
141
141
  AC_ELEM(
142
142
  "parameter",
@@ -148,7 +148,7 @@ class PlantUMLExtension(MarketplaceExtension):
148
148
  return AC_ELEM(
149
149
  "structured-macro",
150
150
  {
151
- AC_ATTR("name"): "plantumlcloud",
151
+ AC_ATTR("name"): "plantumlcloud", # spellchecker:disable-line
152
152
  AC_ATTR("schema-version"): "1",
153
153
  "data-layout": "default",
154
154
  AC_ATTR("local-id"): local_id,
@@ -92,17 +92,16 @@ def render_diagram(
92
92
  if config is None:
93
93
  config = PlantUMLConfigProperties()
94
94
 
95
- # Build command for PlantUML with pipe mode
96
- # -pipe: read from stdin and write to stdout
97
- # -t<format>: output format (png or svg)
98
- # -charset utf-8: ensure UTF-8 encoding
95
+ # command for PlantUML with pipe mode
99
96
  cmd = _get_plantuml_command()
100
97
  cmd.extend(
101
98
  [
102
- "-pipe",
103
- f"-t{output_format}",
104
- "-charset",
99
+ "--charset",
105
100
  "utf-8",
101
+ "--format",
102
+ output_format,
103
+ "--no-error-image",
104
+ "--pipe",
106
105
  ]
107
106
  )
108
107
 
md2conf/png.py CHANGED
@@ -12,6 +12,10 @@ from struct import unpack
12
12
  from typing import BinaryIO, Iterable, overload
13
13
 
14
14
 
15
+ class ImageFormatError(RuntimeError):
16
+ pass
17
+
18
+
15
19
  class _Chunk:
16
20
  "Data chunk in binary data as per the PNG image format."
17
21
 
@@ -34,7 +38,7 @@ def _read_signature(f: BinaryIO) -> None:
34
38
 
35
39
  signature = f.read(8)
36
40
  if signature != b"\x89PNG\r\n\x1a\n":
37
- raise ValueError("not a valid PNG file")
41
+ raise ImageFormatError("not a valid PNG file")
38
42
 
39
43
 
40
44
  def _read_chunk(f: BinaryIO) -> _Chunk | None:
@@ -45,7 +49,7 @@ def _read_chunk(f: BinaryIO) -> _Chunk | None:
45
49
  return None
46
50
 
47
51
  if len(length_bytes) != 4:
48
- raise ValueError("expected: 4 bytes storing chunk length")
52
+ raise ImageFormatError("expected: 4 bytes storing chunk length")
49
53
 
50
54
  length = int.from_bytes(length_bytes, "big")
51
55
 
@@ -53,7 +57,7 @@ def _read_chunk(f: BinaryIO) -> _Chunk | None:
53
57
  data_bytes = f.read(data_length)
54
58
  actual_length = len(data_bytes)
55
59
  if actual_length != data_length:
56
- raise ValueError(f"expected: {length} bytes storing chunk data; got: {actual_length}")
60
+ raise ImageFormatError(f"expected: {length} bytes storing chunk data; got: {actual_length}")
57
61
 
58
62
  chunk_type = data_bytes[0:4]
59
63
  chunk_data = data_bytes[4:-4]
@@ -75,12 +79,12 @@ def _extract_png_dimensions(source_file: BinaryIO) -> tuple[int, int]:
75
79
  # validate IHDR (Image Header) chunk
76
80
  ihdr = _read_chunk(source_file)
77
81
  if ihdr is None:
78
- raise ValueError("missing IHDR chunk")
82
+ raise ImageFormatError("missing IHDR chunk")
79
83
 
80
84
  if ihdr.length != 13:
81
- raise ValueError("invalid chunk length")
85
+ raise ImageFormatError("invalid chunk length")
82
86
  if ihdr.name != b"IHDR":
83
- raise ValueError(f"expected: IHDR chunk; got: {ihdr.name!r}")
87
+ raise ImageFormatError(f"expected: IHDR chunk; got: {ihdr.name!r}")
84
88
 
85
89
  (
86
90
  width,
md2conf/processor.py CHANGED
@@ -15,11 +15,12 @@ from typing import Iterable
15
15
 
16
16
  from .collection import ConfluencePageCollection
17
17
  from .converter import ConfluenceDocument
18
- from .environment import ArgumentError
18
+ from .environment import ArgumentError, PageError
19
19
  from .matcher import DirectoryEntry, FileEntry, Matcher, MatcherOptions
20
20
  from .metadata import ConfluenceSiteMetadata
21
21
  from .options import ConfluencePageID, DocumentOptions
22
22
  from .scanner import Scanner
23
+ from .toc import unique_title
23
24
 
24
25
  LOGGER = logging.getLogger(__name__)
25
26
 
@@ -143,6 +144,22 @@ class Processor:
143
144
  Processes a sub-tree rooted at an ancestor node.
144
145
  """
145
146
 
147
+ # verify if pages have a unique title to avoid overwrites within synchronized set
148
+ title_to_path: dict[str, Path] = {}
149
+ duplicates: set[Path] = set()
150
+ for node in root.all():
151
+ if node.title is not None:
152
+ path = title_to_path.get(node.title)
153
+ if path is not None:
154
+ duplicates.add(path)
155
+ duplicates.add(node.absolute_path)
156
+ else:
157
+ title_to_path[node.title] = node.absolute_path
158
+ if duplicates:
159
+ raise PageError(
160
+ f"expected: each synchronized page to have a unique title but duplicates found in files: {', '.join(str(p) for p in sorted(list(duplicates)))}"
161
+ )
162
+
146
163
  # synchronize directory tree structure with page hierarchy in space (find matching pages in Confluence)
147
164
  self._synchronize_tree(root, self.options.root_page_id)
148
165
 
@@ -246,14 +263,18 @@ class Processor:
246
263
  LOGGER.info("Indexing file: %s", path)
247
264
 
248
265
  # extract information from a Markdown document found in a local directory.
249
- document = Scanner().read(path)
266
+ with open(path, "r", encoding="utf-8") as f:
267
+ text = f.read()
250
268
 
269
+ document = Scanner().parse(text)
251
270
  props = document.properties
271
+ title = props.title or unique_title(text)
272
+
252
273
  return DocumentNode(
253
274
  absolute_path=path,
254
275
  page_id=props.page_id,
255
276
  space_key=props.space_key,
256
- title=props.title,
277
+ title=title,
257
278
  synchronized=props.synchronized if props.synchronized is not None else True,
258
279
  )
259
280
 
md2conf/publisher.py CHANGED
@@ -6,23 +6,101 @@ Copyright 2022-2026, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
+ import hashlib
9
10
  import logging
11
+ from dataclasses import dataclass
10
12
  from pathlib import Path
11
13
 
12
- from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluenceSession, ConfluenceStatus
14
+ from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluencePage, ConfluenceSession, ConfluenceStatus
13
15
  from .attachment import attachment_name
14
16
  from .compatibility import override, path_relative_to
15
- from .converter import ConfluenceDocument, get_volatile_attributes, get_volatile_elements
17
+ from .converter import ConfluenceDocument, ElementType, get_volatile_attributes, get_volatile_elements
16
18
  from .csf import AC_ATTR, elements_from_string
17
19
  from .environment import PageError
18
20
  from .metadata import ConfluencePageMetadata
19
21
  from .options import ConfluencePageID, DocumentOptions
20
22
  from .processor import Converter, DocumentNode, Processor, ProcessorFactory
23
+ from .serializer import json_to_object, object_to_json
21
24
  from .xml import is_xml_equal, unwrap_substitute
22
25
 
23
26
  LOGGER = logging.getLogger(__name__)
24
27
 
25
28
 
29
+ CONTENT_PROPERTY_TAG = "md2conf"
30
+
31
+
32
+ class _MissingType:
33
+ pass
34
+
35
+
36
+ _MissingDefault = _MissingType()
37
+
38
+
39
+ class ParentCatalog:
40
+ "Maintains a catalog of child-parent relationships."
41
+
42
+ _api: ConfluenceSession
43
+ _child_to_parent: dict[str, str | None]
44
+ _known: set[str]
45
+
46
+ def __init__(self, api: ConfluenceSession) -> None:
47
+ self._api = api
48
+ self._child_to_parent = {}
49
+ self._known = set()
50
+
51
+ def add_known(self, page_id: str) -> None:
52
+ """
53
+ Adds a new well-known page such as the root page or a page paired with a Markdown file using an explicit page ID.
54
+ """
55
+
56
+ self._known.add(page_id)
57
+
58
+ def add_parent(self, *, page_id: str, parent_id: str | None) -> None:
59
+ """
60
+ Adds a new child-parent relationship.
61
+
62
+ This method is useful to persist information acquired by a previous API call.
63
+ """
64
+
65
+ self._child_to_parent[page_id] = parent_id
66
+
67
+ def is_traceable(self, page_id: str) -> bool:
68
+ """
69
+ Verifies if a page traces back to a well-known root page.
70
+
71
+ :param page_id: The page to check.
72
+ """
73
+
74
+ if page_id in self._known:
75
+ return True
76
+
77
+ known_parent_id = self._child_to_parent.get(page_id, _MissingDefault)
78
+ if not isinstance(known_parent_id, _MissingType):
79
+ parent_id = known_parent_id
80
+ else:
81
+ page = self._api.get_page_properties(page_id)
82
+ parent_id = page.parentId
83
+ self._child_to_parent[page_id] = parent_id
84
+
85
+ if parent_id is None:
86
+ return False
87
+
88
+ return self.is_traceable(parent_id)
89
+
90
+
91
+ @dataclass
92
+ class ConfluenceMarkdownTag:
93
+ """
94
+ Captures information used to synchronize the Markdown source file with the Confluence target page.
95
+
96
+ :param page_version: Confluence page version number when the page was last synchronized.
97
+ :param source_digest: MD5 hash computed from the Markdown source file.
98
+ """
99
+
100
+ page_version: int
101
+ source_digest: str
102
+
103
+
26
104
  class SynchronizingProcessor(Processor):
27
105
  """
28
106
  Synchronizes a single Markdown page or a directory of Markdown pages with Confluence.
@@ -59,14 +137,18 @@ class SynchronizingProcessor(Processor):
59
137
  elif root_id is not None:
60
138
  real_id = root_id
61
139
  else:
62
- raise NotImplementedError("condition not exhaustive")
140
+ raise NotImplementedError("condition not exhaustive for synchronizing tree")
63
141
 
64
- self._synchronize_subtree(tree, real_id)
142
+ catalog = ParentCatalog(self.api)
143
+ catalog.add_known(real_id.page_id)
144
+ self._synchronize_subtree(tree, real_id, catalog)
65
145
 
66
- def _synchronize_subtree(self, node: DocumentNode, parent_id: ConfluencePageID) -> None:
146
+ def _synchronize_subtree(self, node: DocumentNode, parent_id: ConfluencePageID, catalog: ParentCatalog) -> None:
67
147
  if node.page_id is not None:
68
148
  # verify if page exists
69
149
  page = self.api.get_page_properties(node.page_id)
150
+ catalog.add_known(page.id)
151
+ catalog.add_parent(page_id=page.id, parent_id=page.parentId)
70
152
  update = False
71
153
  else:
72
154
  if node.title is not None:
@@ -77,20 +159,26 @@ class SynchronizingProcessor(Processor):
77
159
  digest = self._generate_hash(node.absolute_path)
78
160
  title = f"{node.absolute_path.stem} [{digest}]"
79
161
 
80
- if self.options.title_prefix is not None:
81
- title = f"{self.options.title_prefix} {title}"
162
+ title = self._get_extended_title(title)
82
163
 
83
164
  # look up page by (possibly auto-generated) title
84
165
  page = self.api.get_or_create_page(title, parent_id.page_id)
166
+ catalog.add_parent(page_id=page.id, parent_id=page.parentId)
85
167
 
86
168
  if page.status is ConfluenceStatus.ARCHIVED:
87
- # user has archived a page with this (auto-generated) title
88
- raise PageError(f"unable to update archived page with ID {page.id}")
169
+ # user has archived a page with this (possibly auto-generated) title
170
+ raise PageError(f"unable to update archived page with ID {page.id} when synchronizing {node.absolute_path}")
171
+
172
+ if not catalog.is_traceable(page.id):
173
+ raise PageError(
174
+ f"expected: page with ID {page.id} to be a descendant of the root page or one of the pages paired with a Markdown file using an explicit "
175
+ f"page ID when synchronizing {node.absolute_path}"
176
+ )
89
177
 
90
178
  update = True
91
179
 
92
180
  space_key = self.api.space_id_to_key(page.spaceId)
93
- if update:
181
+ if update and not self.options.skip_update:
94
182
  self._update_markdown(
95
183
  node.absolute_path,
96
184
  page_id=page.id,
@@ -106,7 +194,7 @@ class SynchronizingProcessor(Processor):
106
194
  self.page_metadata.add(node.absolute_path, data)
107
195
 
108
196
  for child_node in node.children():
109
- self._synchronize_subtree(child_node, ConfluencePageID(page.id))
197
+ self._synchronize_subtree(child_node, ConfluencePageID(page.id), catalog)
110
198
 
111
199
  @override
112
200
  def _update_page(self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path) -> None:
@@ -136,48 +224,117 @@ class SynchronizingProcessor(Processor):
136
224
  content = document.xhtml()
137
225
  LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
138
226
 
139
- title = None
140
- if document.title is not None:
141
- meta = self.page_metadata.get(path)
142
- if meta is not None and meta.title != document.title:
143
- conflicting_page_id = self.api.page_exists(document.title, space_id=self.api.space_key_to_id(meta.space_key))
144
- if conflicting_page_id is None:
145
- title = document.title
146
- else:
147
- LOGGER.info(
148
- "Document title of %s conflicts with Confluence page title of %s",
149
- path,
150
- conflicting_page_id,
151
- )
227
+ # compute content hash to help detect if document has changed
228
+ m = hashlib.md5()
229
+ with open(path, "rb") as f:
230
+ m.update(f.read())
231
+ source_digest = m.hexdigest()
232
+
233
+ # set Confluence title based on Markdown content
234
+ title = self._get_unique_title(document, path)
152
235
 
153
236
  # fetch existing page
154
237
  page = self.api.get_page(page_id.page_id)
238
+ prop = self.api.get_content_property_for_page(page_id.page_id, CONTENT_PROPERTY_TAG)
239
+ tag: ConfluenceMarkdownTag | None = None
240
+ if prop is not None:
241
+ try:
242
+ tag = json_to_object(ConfluenceMarkdownTag, prop.value)
243
+ LOGGER.debug("Page with ID %s has last synchronized version of %d and hash of %s", page.id, tag.page_version, tag.source_digest)
244
+ except Exception:
245
+ pass
246
+
247
+ # keep existing Confluence title if cannot infer meaningful title from Markdown source
155
248
  if not title: # empty or `None`
156
249
  title = page.title
157
250
 
251
+ # synchronize page if page has any changes
252
+ if self._has_changes(page, tag, title, document.root, source_digest):
253
+ if tag is not None and page.version.number != tag.page_version:
254
+ LOGGER.warning("Page with ID %s has been edited since last synchronized: %s", page.id, page.title)
255
+
256
+ relative_path = path_relative_to(path, self.root_dir)
257
+ version = page.version.number + 1
258
+ self.api.update_page(page.id, content, title=title, version=version, message=f"Synchronized by md2conf from Markdown file: {relative_path}")
259
+ else:
260
+ version = page.version.number
261
+
262
+ if document.labels is not None:
263
+ self.api.update_labels(
264
+ page.id,
265
+ [ConfluenceLabel(name=label, prefix="global") for label in document.labels],
266
+ )
267
+
268
+ props = [ConfluenceContentProperty(CONTENT_PROPERTY_TAG, object_to_json(ConfluenceMarkdownTag(version, source_digest)))]
269
+ if document.properties is not None:
270
+ props.extend(ConfluenceContentProperty(key, value) for key, value in document.properties.items())
271
+ self.api.update_content_properties_for_page(page.id, props)
272
+ else:
273
+ if tag is None or tag.page_version != version:
274
+ self.api.update_content_properties_for_page(page.id, props, keep_existing=True)
275
+
276
+ def _has_changes(self, page: ConfluencePage, tag: ConfluenceMarkdownTag | None, title: str, root: ElementType, source_digest: str) -> bool:
277
+ "True if the Confluence Storage Format content generated from the Markdown source file matches the Confluence target page content."
278
+
279
+ if page.title != title:
280
+ LOGGER.info("Detected page with new title: %s", page.id)
281
+ return True
282
+
283
+ if tag is not None and tag.source_digest != source_digest:
284
+ LOGGER.info("Detected page with updated Markdown source: %s", page.id)
285
+ return True
286
+
158
287
  # discard comments
159
288
  tree = elements_from_string(page.content)
160
289
  unwrap_substitute(AC_ATTR("inline-comment-marker"), tree)
161
290
 
162
- # check if page has any changes
163
- if page.title != title or not is_xml_equal(
164
- document.root,
291
+ # visit XML nodes recursively
292
+ if not is_xml_equal(
293
+ root,
165
294
  tree,
166
295
  skip_attributes=get_volatile_attributes(),
167
296
  skip_elements=get_volatile_elements(),
168
297
  ):
169
- self.api.update_page(page_id.page_id, content, title=title, version=page.version.number + 1)
298
+ LOGGER.info("Detected page with updated Markdown content: %s", page.id)
299
+ return True
300
+
301
+ LOGGER.info("Up-to-date page: %s", page.id)
302
+ return False
303
+
304
+ def _get_extended_title(self, title: str) -> str:
305
+ """
306
+ Returns a title with the title prefix applied (if any).
307
+ """
308
+
309
+ if self.options.title_prefix is not None:
310
+ return f"{self.options.title_prefix} {title}"
170
311
  else:
171
- LOGGER.info("Up-to-date page: %s", page_id.page_id)
312
+ return title
172
313
 
173
- if document.labels is not None:
174
- self.api.update_labels(
175
- page_id.page_id,
176
- [ConfluenceLabel(name=label, prefix="global") for label in document.labels],
177
- )
314
+ def _get_unique_title(self, document: ConfluenceDocument, path: Path) -> str | None:
315
+ """
316
+ Determines the (new) document title to assign to the Confluence page.
178
317
 
179
- if document.properties is not None:
180
- self.api.update_content_properties_for_page(page_id.page_id, [ConfluenceContentProperty(key, value) for key, value in document.properties.items()])
318
+ Ensures that the title is unique across the Confluence space.
319
+ """
320
+
321
+ # document has no title (neither in front-matter nor as unique top-level heading)
322
+ if document.title is None:
323
+ return None
324
+
325
+ # add configured title prefix
326
+ title = self._get_extended_title(document.title)
327
+
328
+ # compare current document title with title discovered during directory traversal
329
+ meta = self.page_metadata.get(path)
330
+ if meta is not None and meta.title != title:
331
+ # title has changed, check if new title is available
332
+ page_id = self.api.page_exists(title, space_id=self.api.space_key_to_id(meta.space_key))
333
+ if page_id is not None:
334
+ LOGGER.info("Unrelated Confluence page with ID %s has the same inferred title as the Markdown file: %s", page_id, path)
335
+ return None
336
+
337
+ return title
181
338
 
182
339
  def _update_markdown(self, path: Path, *, page_id: str, space_key: str) -> None:
183
340
  """
md2conf/reflection.py ADDED
@@ -0,0 +1,74 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2026, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+ from dataclasses import fields, is_dataclass
11
+ from types import NoneType, UnionType
12
+ from typing import Any, Literal, Union, get_args, get_origin, get_type_hints
13
+
14
+
15
+ def get_nested_types(items: Sequence[Any]) -> set[type[Any]]:
16
+ "Returns a set of types that are directly or indirectly referenced by any of the specified items."
17
+
18
+ tps: set[type[Any]] = set()
19
+ for item in items:
20
+ tps.update(_get_nested_types(item))
21
+ return tps
22
+
23
+
24
+ def _get_nested_types(tp: Any) -> set[type[Any]]:
25
+ tps: set[type[Any]] = set()
26
+ if tp is not None and tp is not NoneType:
27
+ origin = get_origin(tp)
28
+ if origin is list:
29
+ (item_type,) = get_args(tp)
30
+ tps.update(_get_nested_types(item_type))
31
+ elif origin is dict:
32
+ key_type, value_type = get_args(tp)
33
+ tps.update(_get_nested_types(key_type))
34
+ tps.update(_get_nested_types(value_type))
35
+ elif origin is set:
36
+ (elem_type,) = get_args(tp)
37
+ tps.update(_get_nested_types(elem_type))
38
+ elif origin is UnionType or origin is Union:
39
+ for union_arg in get_args(tp):
40
+ tps.update(_get_nested_types(union_arg))
41
+ elif isinstance(tp, type):
42
+ tps.add(tp)
43
+ if is_dataclass(tp):
44
+ for field in fields(tp):
45
+ tps.update(_get_nested_types(field.type))
46
+ elif isinstance(tp, type): # required to please static type checkers
47
+ for field_type in get_type_hints(tp).values():
48
+ tps.update(_get_nested_types(field_type))
49
+ return tps
50
+
51
+
52
+ def format_initializer(tp: Any) -> str:
53
+ "Prints an initialization value for a type."
54
+
55
+ origin = get_origin(tp)
56
+ if tp is None or tp is NoneType:
57
+ return "None"
58
+ elif origin is list:
59
+ (item_type,) = get_args(tp)
60
+ return f"[{format_initializer(item_type)}]"
61
+ elif origin is dict:
62
+ key_type, value_type = get_args(tp)
63
+ return f"{{{format_initializer(key_type)}: {format_initializer(value_type)}}}"
64
+ elif origin is set:
65
+ (elem_type,) = get_args(tp)
66
+ return f"[{format_initializer(elem_type)}]"
67
+ elif origin is Literal:
68
+ return " or ".join(repr(arg) for arg in get_args(tp))
69
+ elif origin is UnionType or origin is Union:
70
+ return " or ".join(format_initializer(arg) for arg in get_args(tp))
71
+ elif isinstance(tp, type):
72
+ return f"{tp.__name__}()"
73
+ else:
74
+ return "..."
md2conf/scanner.py CHANGED
@@ -63,10 +63,12 @@ class ScannedDocument:
63
63
 
64
64
  :param properties: Properties extracted from the front-matter of a Markdown document.
65
65
  :param text: Text that remains after front-matter and inline properties have been extracted.
66
+ :param start_line_number: The first line of the Markdown document excluding front-matter, or 1 if there is no front-matter.
66
67
  """
67
68
 
68
69
  properties: DocumentProperties
69
70
  text: str
71
+ start_line_number: int
70
72
 
71
73
 
72
74
  class Scanner:
@@ -75,10 +77,16 @@ class Scanner:
75
77
  Extracts essential properties from a Markdown document.
76
78
  """
77
79
 
78
- # parse file
79
80
  with open(absolute_path, "r", encoding="utf-8") as f:
80
81
  text = f.read()
81
82
 
83
+ return self.parse(text)
84
+
85
+ def parse(self, text: str) -> ScannedDocument:
86
+ """
87
+ Extracts essential properties from a Markdown document.
88
+ """
89
+
82
90
  # extract Confluence page ID
83
91
  page_id, text = extract_value(r"<!--\s+confluence[-_]page[-_]id:\s*(\d+)\s+-->", text)
84
92
 
@@ -91,16 +99,18 @@ class Scanner:
91
99
  body_props = DocumentProperties(page_id=page_id, space_key=space_key, generated_by=generated_by)
92
100
 
93
101
  # extract front-matter
94
- data, text = extract_frontmatter_json(text)
95
- if data is not None:
96
- frontmatter_props = json_to_object(DocumentProperties, data)
97
- alias_props = json_to_object(AliasProperties, data)
102
+ frontmatter, text = extract_frontmatter_json(text)
103
+ if frontmatter is not None:
104
+ frontmatter_props = json_to_object(DocumentProperties, frontmatter.data)
105
+ alias_props = json_to_object(AliasProperties, frontmatter.data)
98
106
  if alias_props.confluence_page_id is not None:
99
107
  frontmatter_props.page_id = alias_props.confluence_page_id
100
108
  if alias_props.confluence_space_key is not None:
101
109
  frontmatter_props.space_key = alias_props.confluence_space_key
102
110
  props = coalesce(body_props, frontmatter_props)
111
+ start_line_number = frontmatter.outer_line_count + 1
103
112
  else:
104
113
  props = body_props
114
+ start_line_number = 1
105
115
 
106
- return ScannedDocument(properties=props, text=text)
116
+ return ScannedDocument(properties=props, text=text, start_line_number=start_line_number)
md2conf/serializer.py CHANGED
@@ -8,7 +8,7 @@ Copyright 2022-2026, Levente Hunyadi
8
8
 
9
9
  import sys
10
10
  from datetime import datetime
11
- from typing import TypeVar
11
+ from typing import TypeVar, cast
12
12
 
13
13
  from cattrs.preconf.orjson import make_converter # spellchecker:disable-line
14
14
 
@@ -53,6 +53,17 @@ def json_to_object(typ: type[T], data: JsonType) -> T:
53
53
  return _converter.structure(data, typ)
54
54
 
55
55
 
56
+ def object_to_json(data: object) -> JsonType:
57
+ """
58
+ Converts a structured object to a JSON object, ready to be serialized to a JSON string.
59
+
60
+ :param data: Python object to convert to a JSON object.
61
+ :returns: JSON object, ready to be serialized to a JSON encoded in UTF-8.
62
+ """
63
+
64
+ return cast(JsonType, _converter.unstructure(data))
65
+
66
+
56
67
  def object_to_json_payload(data: object) -> bytes:
57
68
  """
58
69
  Converts a structured object to a JSON string encoded in UTF-8.