markdown-to-confluence 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/publisher.py CHANGED
@@ -6,23 +6,29 @@ Copyright 2022-2026, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
+ import hashlib
9
10
  import logging
11
+ from dataclasses import dataclass
10
12
  from pathlib import Path
11
13
 
12
- from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluenceSession, ConfluenceStatus
14
+ from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluencePage, ConfluenceSession, ConfluenceStatus
13
15
  from .attachment import attachment_name
14
16
  from .compatibility import override, path_relative_to
15
- from .converter import ConfluenceDocument, get_volatile_attributes, get_volatile_elements
17
+ from .converter import ConfluenceDocument, ElementType, get_volatile_attributes, get_volatile_elements
16
18
  from .csf import AC_ATTR, elements_from_string
17
19
  from .environment import PageError
18
20
  from .metadata import ConfluencePageMetadata
19
21
  from .options import ConfluencePageID, DocumentOptions
20
22
  from .processor import Converter, DocumentNode, Processor, ProcessorFactory
23
+ from .serializer import json_to_object, object_to_json
21
24
  from .xml import is_xml_equal, unwrap_substitute
22
25
 
23
26
  LOGGER = logging.getLogger(__name__)
24
27
 
25
28
 
29
+ CONTENT_PROPERTY_TAG = "md2conf"
30
+
31
+
26
32
  class _MissingType:
27
33
  pass
28
34
 
@@ -82,6 +88,19 @@ class ParentCatalog:
82
88
  return self.is_traceable(parent_id)
83
89
 
84
90
 
91
+ @dataclass
92
+ class ConfluenceMarkdownTag:
93
+ """
94
+ Captures information used to synchronize the Markdown source file with the Confluence target page.
95
+
96
+ :param page_version: Confluence page version number when the page was last synchronized.
97
+ :param source_digest: MD5 hash computed from the Markdown source file.
98
+ """
99
+
100
+ page_version: int
101
+ source_digest: str
102
+
103
+
85
104
  class SynchronizingProcessor(Processor):
86
105
  """
87
106
  Synchronizes a single Markdown page or a directory of Markdown pages with Confluence.
@@ -205,36 +224,82 @@ class SynchronizingProcessor(Processor):
205
224
  content = document.xhtml()
206
225
  LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
207
226
 
227
+ # compute content hash to help detect if document has changed
228
+ m = hashlib.md5()
229
+ with open(path, "rb") as f:
230
+ m.update(f.read())
231
+ source_digest = m.hexdigest()
232
+
233
+ # set Confluence title based on Markdown content
208
234
  title = self._get_unique_title(document, path)
209
235
 
210
236
  # fetch existing page
211
237
  page = self.api.get_page(page_id.page_id)
238
+ prop = self.api.get_content_property_for_page(page_id.page_id, CONTENT_PROPERTY_TAG)
239
+ tag: ConfluenceMarkdownTag | None = None
240
+ if prop is not None:
241
+ try:
242
+ tag = json_to_object(ConfluenceMarkdownTag, prop.value)
243
+ LOGGER.debug("Page with ID %s has last synchronized version of %d and hash of %s", page.id, tag.page_version, tag.source_digest)
244
+ except Exception:
245
+ pass
246
+
247
+ # keep existing Confluence title if cannot infer meaningful title from Markdown source
212
248
  if not title: # empty or `None`
213
249
  title = page.title
214
250
 
215
- # discard comments
216
- tree = elements_from_string(page.content)
217
- unwrap_substitute(AC_ATTR("inline-comment-marker"), tree)
251
+ # synchronize page if page has any changes
252
+ if self._has_changes(page, tag, title, document.root, source_digest):
253
+ if tag is not None and page.version.number != tag.page_version:
254
+ LOGGER.warning("Page with ID %s has been edited since last synchronized: %s", page.id, page.title)
218
255
 
219
- # check if page has any changes
220
- if page.title != title or not is_xml_equal(
221
- document.root,
222
- tree,
223
- skip_attributes=get_volatile_attributes(),
224
- skip_elements=get_volatile_elements(),
225
- ):
226
- self.api.update_page(page_id.page_id, content, title=title, version=page.version.number + 1)
256
+ relative_path = path_relative_to(path, self.root_dir)
257
+ version = page.version.number + 1
258
+ self.api.update_page(page.id, content, title=title, version=version, message=f"Synchronized by md2conf from Markdown file: {relative_path}")
227
259
  else:
228
- LOGGER.info("Up-to-date page: %s", page_id.page_id)
260
+ version = page.version.number
229
261
 
230
262
  if document.labels is not None:
231
263
  self.api.update_labels(
232
- page_id.page_id,
264
+ page.id,
233
265
  [ConfluenceLabel(name=label, prefix="global") for label in document.labels],
234
266
  )
235
267
 
268
+ props = [ConfluenceContentProperty(CONTENT_PROPERTY_TAG, object_to_json(ConfluenceMarkdownTag(version, source_digest)))]
236
269
  if document.properties is not None:
237
- self.api.update_content_properties_for_page(page_id.page_id, [ConfluenceContentProperty(key, value) for key, value in document.properties.items()])
270
+ props.extend(ConfluenceContentProperty(key, value) for key, value in document.properties.items())
271
+ self.api.update_content_properties_for_page(page.id, props)
272
+ else:
273
+ if tag is None or tag.page_version != version:
274
+ self.api.update_content_properties_for_page(page.id, props, keep_existing=True)
275
+
276
+ def _has_changes(self, page: ConfluencePage, tag: ConfluenceMarkdownTag | None, title: str, root: ElementType, source_digest: str) -> bool:
277
+ "True if the Confluence Storage Format content generated from the Markdown source file matches the Confluence target page content."
278
+
279
+ if page.title != title:
280
+ LOGGER.info("Detected page with new title: %s", page.id)
281
+ return True
282
+
283
+ if tag is not None and tag.source_digest != source_digest:
284
+ LOGGER.info("Detected page with updated Markdown source: %s", page.id)
285
+ return True
286
+
287
+ # discard comments
288
+ tree = elements_from_string(page.content)
289
+ unwrap_substitute(AC_ATTR("inline-comment-marker"), tree)
290
+
291
+ # visit XML nodes recursively
292
+ if not is_xml_equal(
293
+ root,
294
+ tree,
295
+ skip_attributes=get_volatile_attributes(),
296
+ skip_elements=get_volatile_elements(),
297
+ ):
298
+ LOGGER.info("Detected page with updated Markdown content: %s", page.id)
299
+ return True
300
+
301
+ LOGGER.info("Up-to-date page: %s", page.id)
302
+ return False
238
303
 
239
304
  def _get_extended_title(self, title: str) -> str:
240
305
  """
md2conf/reflection.py ADDED
@@ -0,0 +1,74 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2026, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+ from dataclasses import fields, is_dataclass
11
+ from types import NoneType, UnionType
12
+ from typing import Any, Literal, Union, get_args, get_origin, get_type_hints
13
+
14
+
15
+ def get_nested_types(items: Sequence[Any]) -> set[type[Any]]:
16
+ "Returns a set of types that are directly or indirectly referenced by any of the specified items."
17
+
18
+ tps: set[type[Any]] = set()
19
+ for item in items:
20
+ tps.update(_get_nested_types(item))
21
+ return tps
22
+
23
+
24
+ def _get_nested_types(tp: Any) -> set[type[Any]]:
25
+ tps: set[type[Any]] = set()
26
+ if tp is not None and tp is not NoneType:
27
+ origin = get_origin(tp)
28
+ if origin is list:
29
+ (item_type,) = get_args(tp)
30
+ tps.update(_get_nested_types(item_type))
31
+ elif origin is dict:
32
+ key_type, value_type = get_args(tp)
33
+ tps.update(_get_nested_types(key_type))
34
+ tps.update(_get_nested_types(value_type))
35
+ elif origin is set:
36
+ (elem_type,) = get_args(tp)
37
+ tps.update(_get_nested_types(elem_type))
38
+ elif origin is UnionType or origin is Union:
39
+ for union_arg in get_args(tp):
40
+ tps.update(_get_nested_types(union_arg))
41
+ elif isinstance(tp, type):
42
+ tps.add(tp)
43
+ if is_dataclass(tp):
44
+ for field in fields(tp):
45
+ tps.update(_get_nested_types(field.type))
46
+ elif isinstance(tp, type): # required to please static type checkers
47
+ for field_type in get_type_hints(tp).values():
48
+ tps.update(_get_nested_types(field_type))
49
+ return tps
50
+
51
+
52
+ def format_initializer(tp: Any) -> str:
53
+ "Prints an initialization value for a type."
54
+
55
+ origin = get_origin(tp)
56
+ if tp is None or tp is NoneType:
57
+ return "None"
58
+ elif origin is list:
59
+ (item_type,) = get_args(tp)
60
+ return f"[{format_initializer(item_type)}]"
61
+ elif origin is dict:
62
+ key_type, value_type = get_args(tp)
63
+ return f"{{{format_initializer(key_type)}: {format_initializer(value_type)}}}"
64
+ elif origin is set:
65
+ (elem_type,) = get_args(tp)
66
+ return f"[{format_initializer(elem_type)}]"
67
+ elif origin is Literal:
68
+ return " or ".join(repr(arg) for arg in get_args(tp))
69
+ elif origin is UnionType or origin is Union:
70
+ return " or ".join(format_initializer(arg) for arg in get_args(tp))
71
+ elif isinstance(tp, type):
72
+ return f"{tp.__name__}()"
73
+ else:
74
+ return "..."
md2conf/scanner.py CHANGED
@@ -63,10 +63,12 @@ class ScannedDocument:
63
63
 
64
64
  :param properties: Properties extracted from the front-matter of a Markdown document.
65
65
  :param text: Text that remains after front-matter and inline properties have been extracted.
66
+ :param start_line_number: The first line of the Markdown document excluding front-matter, or 1 if there is no front-matter.
66
67
  """
67
68
 
68
69
  properties: DocumentProperties
69
70
  text: str
71
+ start_line_number: int
70
72
 
71
73
 
72
74
  class Scanner:
@@ -97,16 +99,18 @@ class Scanner:
97
99
  body_props = DocumentProperties(page_id=page_id, space_key=space_key, generated_by=generated_by)
98
100
 
99
101
  # extract front-matter
100
- data, text = extract_frontmatter_json(text)
101
- if data is not None:
102
- frontmatter_props = json_to_object(DocumentProperties, data)
103
- alias_props = json_to_object(AliasProperties, data)
102
+ frontmatter, text = extract_frontmatter_json(text)
103
+ if frontmatter is not None:
104
+ frontmatter_props = json_to_object(DocumentProperties, frontmatter.data)
105
+ alias_props = json_to_object(AliasProperties, frontmatter.data)
104
106
  if alias_props.confluence_page_id is not None:
105
107
  frontmatter_props.page_id = alias_props.confluence_page_id
106
108
  if alias_props.confluence_space_key is not None:
107
109
  frontmatter_props.space_key = alias_props.confluence_space_key
108
110
  props = coalesce(body_props, frontmatter_props)
111
+ start_line_number = frontmatter.outer_line_count + 1
109
112
  else:
110
113
  props = body_props
114
+ start_line_number = 1
111
115
 
112
- return ScannedDocument(properties=props, text=text)
116
+ return ScannedDocument(properties=props, text=text, start_line_number=start_line_number)
md2conf/serializer.py CHANGED
@@ -8,7 +8,7 @@ Copyright 2022-2026, Levente Hunyadi
8
8
 
9
9
  import sys
10
10
  from datetime import datetime
11
- from typing import TypeVar
11
+ from typing import TypeVar, cast
12
12
 
13
13
  from cattrs.preconf.orjson import make_converter # spellchecker:disable-line
14
14
 
@@ -53,6 +53,17 @@ def json_to_object(typ: type[T], data: JsonType) -> T:
53
53
  return _converter.structure(data, typ)
54
54
 
55
55
 
56
+ def object_to_json(data: object) -> JsonType:
57
+ """
58
+ Converts a structured object to a JSON object, ready to be serialized to a JSON string.
59
+
60
+ :param data: Python object to convert to a JSON object.
61
+ :returns: JSON object, ready to be serialized to a JSON encoded in UTF-8.
62
+ """
63
+
64
+ return cast(JsonType, _converter.unstructure(data))
65
+
66
+
56
67
  def object_to_json_payload(data: object) -> bytes:
57
68
  """
58
69
  Converts a structured object to a JSON string encoded in UTF-8.
md2conf/svg.py CHANGED
@@ -254,6 +254,9 @@ def fix_svg_dimensions(data: bytes) -> bytes:
254
254
  return data.replace(original_tag, new_tag, 1)
255
255
 
256
256
 
257
+ _MEASURE_REGEXP = re.compile(r"^([+-]?(?:\d+\.?\d*|\.\d+))(%|px|pt|em|ex|in|cm|mm|pc)?$", re.IGNORECASE)
258
+
259
+
257
260
  def _parse_svg_length(value: str) -> int | None:
258
261
  """
259
262
  Parses an SVG length value and converts it to pixels.
@@ -271,7 +274,7 @@ def _parse_svg_length(value: str) -> int | None:
271
274
  value = value.strip()
272
275
 
273
276
  # Match number with optional unit
274
- match = re.match(r"^([+-]?(?:\d+\.?\d*|\.\d+))(%|px|pt|em|ex|in|cm|mm|pc)?$", value, re.IGNORECASE)
277
+ match = _MEASURE_REGEXP.match(value)
275
278
  if not match:
276
279
  return None
277
280
 
@@ -321,7 +324,7 @@ def _parse_viewbox(viewbox: str) -> tuple[int, int] | None:
321
324
 
322
325
  # viewBox format: "min-x min-y width height"
323
326
  # Values can be separated by whitespace and/or commas
324
- parts = re.split(r"[\s,]+", viewbox.strip())
327
+ parts = re.split(r"\s*,\s*|\s+", viewbox.strip())
325
328
  if len(parts) != 4:
326
329
  return None
327
330
 
md2conf/toc.py CHANGED
@@ -154,7 +154,7 @@ def unique_title(content: str) -> str | None:
154
154
  """
155
155
 
156
156
  builder = TableOfContentsBuilder()
157
- for heading in headings(content.splitlines(keepends=True)):
157
+ for heading in headings(content.splitlines(keepends=True)): # spellchecker:disable-line
158
158
  level, text = heading
159
159
  builder.add(level, text)
160
160
  return builder.get_title()
md2conf/xml.py CHANGED
@@ -106,6 +106,51 @@ def element_to_text(node: ElementType) -> str:
106
106
  return "".join(node.itertext()).strip()
107
107
 
108
108
 
109
+ def remove_element(child: ElementType) -> None:
110
+ """
111
+ Removes a child element, taking care of its tail text.
112
+
113
+ This function may be unsafe when called in the body of a loop of a live collection iterator, i.e. use
114
+
115
+ ```
116
+ for child in list(node): ...
117
+ ```
118
+
119
+ instead of
120
+
121
+ ```
122
+ for child in node: ...
123
+ ```
124
+ """
125
+
126
+ parent = child.getparent()
127
+ if parent is None:
128
+ return
129
+
130
+ # preserve any text that comes after the heading (tail text)
131
+ tail = child.tail
132
+
133
+ # if there was tail text, attach it to the previous sibling's tail or to the parent's text if this was the first child
134
+ if tail:
135
+ index = parent.index(child)
136
+ if index > 0:
137
+ # append to previous sibling's tail
138
+ prev_sibling = parent[index - 1]
139
+ if prev_sibling.tail:
140
+ prev_sibling.tail += tail
141
+ else:
142
+ prev_sibling.tail = tail
143
+ else:
144
+ # no previous sibling, append to parent's text
145
+ if parent.text:
146
+ parent.text += tail
147
+ else:
148
+ parent.text = tail
149
+
150
+ # remove the element
151
+ parent.remove(child)
152
+
153
+
109
154
  def unwrap_substitute(name: str, root: ElementType) -> None:
110
155
  """
111
156
  Substitutes all occurrences of an element with its contents.