markdown-to-confluence 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/METADATA +258 -157
  2. markdown_to_confluence-0.5.4.dist-info/RECORD +55 -0
  3. {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/licenses/LICENSE +1 -1
  4. md2conf/__init__.py +2 -2
  5. md2conf/__main__.py +83 -44
  6. md2conf/api.py +30 -10
  7. md2conf/attachment.py +72 -0
  8. md2conf/coalesce.py +43 -0
  9. md2conf/collection.py +1 -1
  10. md2conf/{extra.py → compatibility.py} +1 -1
  11. md2conf/converter.py +240 -657
  12. md2conf/csf.py +13 -11
  13. md2conf/drawio/__init__.py +0 -0
  14. md2conf/drawio/extension.py +116 -0
  15. md2conf/{drawio.py → drawio/render.py} +1 -1
  16. md2conf/emoticon.py +3 -3
  17. md2conf/environment.py +2 -2
  18. md2conf/extension.py +82 -0
  19. md2conf/external.py +66 -0
  20. md2conf/formatting.py +135 -0
  21. md2conf/frontmatter.py +70 -0
  22. md2conf/image.py +128 -0
  23. md2conf/latex.py +4 -183
  24. md2conf/local.py +8 -8
  25. md2conf/markdown.py +1 -1
  26. md2conf/matcher.py +1 -1
  27. md2conf/mermaid/__init__.py +0 -0
  28. md2conf/mermaid/config.py +20 -0
  29. md2conf/mermaid/extension.py +109 -0
  30. md2conf/{mermaid.py → mermaid/render.py} +10 -38
  31. md2conf/mermaid/scanner.py +55 -0
  32. md2conf/metadata.py +1 -1
  33. md2conf/{domain.py → options.py} +75 -16
  34. md2conf/plantuml/__init__.py +0 -0
  35. md2conf/plantuml/config.py +20 -0
  36. md2conf/plantuml/extension.py +158 -0
  37. md2conf/plantuml/render.py +138 -0
  38. md2conf/plantuml/scanner.py +56 -0
  39. md2conf/png.py +206 -0
  40. md2conf/processor.py +55 -13
  41. md2conf/publisher.py +127 -39
  42. md2conf/scanner.py +38 -129
  43. md2conf/serializer.py +2 -2
  44. md2conf/svg.py +144 -103
  45. md2conf/text.py +1 -1
  46. md2conf/toc.py +73 -1
  47. md2conf/uri.py +1 -1
  48. md2conf/xml.py +1 -1
  49. markdown_to_confluence-0.5.2.dist-info/RECORD +0 -36
  50. {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/WHEEL +0 -0
  51. {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/entry_points.txt +0 -0
  52. {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/top_level.txt +0 -0
  53. {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/zip-safe +0 -0
  54. /md2conf/{puppeteer-config.json → mermaid/puppeteer-config.json} +0 -0
md2conf/png.py ADDED
@@ -0,0 +1,206 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2026, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ from io import BytesIO
10
+ from pathlib import Path
11
+ from struct import unpack
12
+ from typing import BinaryIO, Iterable, overload
13
+
14
+
15
+ class ImageFormatError(RuntimeError):
16
+ pass
17
+
18
+
19
+ class _Chunk:
20
+ "Data chunk in binary data as per the PNG image format."
21
+
22
+ __slots__ = ("length", "name", "data", "crc")
23
+
24
+ length: int
25
+ name: bytes
26
+ data: bytes
27
+ crc: bytes
28
+
29
+ def __init__(self, length: int, name: bytes, data: bytes, crc: bytes):
30
+ self.length = length
31
+ self.name = name
32
+ self.data = data
33
+ self.crc = crc
34
+
35
+
36
+ def _read_signature(f: BinaryIO) -> None:
37
+ "Reads and checks PNG signature (first 8 bytes)."
38
+
39
+ signature = f.read(8)
40
+ if signature != b"\x89PNG\r\n\x1a\n":
41
+ raise ImageFormatError("not a valid PNG file")
42
+
43
+
44
+ def _read_chunk(f: BinaryIO) -> _Chunk | None:
45
+ "Reads and parses a PNG chunk such as `IHDR` or `tEXt`."
46
+
47
+ length_bytes = f.read(4)
48
+ if not length_bytes:
49
+ return None
50
+
51
+ if len(length_bytes) != 4:
52
+ raise ImageFormatError("expected: 4 bytes storing chunk length")
53
+
54
+ length = int.from_bytes(length_bytes, "big")
55
+
56
+ data_length = 4 + length + 4
57
+ data_bytes = f.read(data_length)
58
+ actual_length = len(data_bytes)
59
+ if actual_length != data_length:
60
+ raise ImageFormatError(f"expected: {length} bytes storing chunk data; got: {actual_length}")
61
+
62
+ chunk_type = data_bytes[0:4]
63
+ chunk_data = data_bytes[4:-4]
64
+ crc = data_bytes[-4:]
65
+
66
+ return _Chunk(length, chunk_type, chunk_data, crc)
67
+
68
+
69
+ def _extract_png_dimensions(source_file: BinaryIO) -> tuple[int, int]:
70
+ """
71
+ Returns the width and height of a PNG image inspecting its header.
72
+
73
+ :param source_file: A binary file opened for reading that contains PNG image data.
74
+ :returns: A tuple of the image's width and height in pixels.
75
+ """
76
+
77
+ _read_signature(source_file)
78
+
79
+ # validate IHDR (Image Header) chunk
80
+ ihdr = _read_chunk(source_file)
81
+ if ihdr is None:
82
+ raise ImageFormatError("missing IHDR chunk")
83
+
84
+ if ihdr.length != 13:
85
+ raise ImageFormatError("invalid chunk length")
86
+ if ihdr.name != b"IHDR":
87
+ raise ImageFormatError(f"expected: IHDR chunk; got: {ihdr.name!r}")
88
+
89
+ (
90
+ width,
91
+ height,
92
+ bit_depth, # pyright: ignore[reportUnusedVariable]
93
+ color_type, # pyright: ignore[reportUnusedVariable]
94
+ compression, # pyright: ignore[reportUnusedVariable]
95
+ filter, # pyright: ignore[reportUnusedVariable]
96
+ interlace, # pyright: ignore[reportUnusedVariable]
97
+ ) = unpack(">IIBBBBB", ihdr.data) # spellchecker:disable-line
98
+ return width, height
99
+
100
+
101
+ @overload
102
+ def extract_png_dimensions(*, data: bytes) -> tuple[int, int]: ...
103
+
104
+
105
+ @overload
106
+ def extract_png_dimensions(*, path: str | Path) -> tuple[int, int]: ...
107
+
108
+
109
+ def extract_png_dimensions(*, data: bytes | None = None, path: str | Path | None = None) -> tuple[int, int]:
110
+ """
111
+ Returns the width and height of a PNG image inspecting its header.
112
+
113
+ :param data: PNG image data.
114
+ :param path: Path to the PNG image file.
115
+ :returns: A tuple of the image's width and height in pixels.
116
+ """
117
+
118
+ if data is not None and path is not None:
119
+ raise TypeError("expected: either `data` or `path`; got: both")
120
+ elif data is not None:
121
+ with BytesIO(data) as f:
122
+ return _extract_png_dimensions(f)
123
+ elif path is not None:
124
+ with open(path, "rb") as f:
125
+ return _extract_png_dimensions(f)
126
+ else:
127
+ raise TypeError("expected: either `data` or `path`; got: neither")
128
+
129
+
130
+ def _write_chunk(f: BinaryIO, chunk: _Chunk) -> None:
131
+ f.write(chunk.length.to_bytes(4, "big"))
132
+ f.write(chunk.name)
133
+ f.write(chunk.data)
134
+ f.write(chunk.crc)
135
+
136
+
137
+ def _remove_png_chunks(names: Iterable[str], source_file: BinaryIO, target_file: BinaryIO) -> None:
138
+ """
139
+ Rewrites a PNG file by removing chunks with the specified names.
140
+
141
+ :param source_file: A binary file opened for reading that contains PNG image data.
142
+ :param target_file: A binary file opened for writing to receive PNG image data.
143
+ """
144
+
145
+ exclude_set = set(name.encode("ascii") for name in names)
146
+
147
+ _read_signature(source_file)
148
+ target_file.write(b"\x89PNG\r\n\x1a\n")
149
+
150
+ while True:
151
+ chunk = _read_chunk(source_file)
152
+ if chunk is None:
153
+ break
154
+
155
+ if chunk.name not in exclude_set:
156
+ _write_chunk(target_file, chunk)
157
+
158
+
159
+ @overload
160
+ def remove_png_chunks(names: Iterable[str], *, source_data: bytes) -> bytes: ...
161
+
162
+
163
+ @overload
164
+ def remove_png_chunks(names: Iterable[str], *, source_path: str | Path) -> bytes: ...
165
+
166
+
167
+ @overload
168
+ def remove_png_chunks(names: Iterable[str], *, source_data: bytes, target_path: str | Path) -> None: ...
169
+
170
+
171
+ @overload
172
+ def remove_png_chunks(names: Iterable[str], *, source_path: str | Path, target_path: str | Path) -> None: ...
173
+
174
+
175
+ def remove_png_chunks(
176
+ names: Iterable[str], *, source_data: bytes | None = None, source_path: str | Path | None = None, target_path: str | Path | None = None
177
+ ) -> bytes | None:
178
+ """
179
+ Rewrites a PNG file by removing chunks with the specified names.
180
+
181
+ :param source_data: PNG image data.
182
+ :param source_path: Path to the file to read from.
183
+ :param target_path: Path to the file to write to.
184
+ """
185
+
186
+ if source_data is not None and source_path is not None:
187
+ raise TypeError("expected: either `source_data` or `source_path`; got: both")
188
+ elif source_data is not None:
189
+
190
+ def source_reader() -> BinaryIO:
191
+ return BytesIO(source_data)
192
+ elif source_path is not None:
193
+
194
+ def source_reader() -> BinaryIO:
195
+ return open(source_path, "rb")
196
+ else:
197
+ raise TypeError("expected: either `source_data` or `source_path`; got: neither")
198
+
199
+ if target_path is None:
200
+ with source_reader() as source_file, BytesIO() as memory_file:
201
+ _remove_png_chunks(names, source_file, memory_file)
202
+ return memory_file.getvalue()
203
+ else:
204
+ with source_reader() as source_file, open(target_path, "wb") as target_file:
205
+ _remove_png_chunks(names, source_file, target_file)
206
+ return None
md2conf/processor.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Publish Markdown files to Confluence wiki.
3
3
 
4
- Copyright 2022-2025, Levente Hunyadi
4
+ Copyright 2022-2026, Levente Hunyadi
5
5
 
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
@@ -15,16 +15,19 @@ from typing import Iterable
15
15
 
16
16
  from .collection import ConfluencePageCollection
17
17
  from .converter import ConfluenceDocument
18
- from .domain import ConfluenceDocumentOptions, ConfluencePageID
19
- from .environment import ArgumentError
18
+ from .environment import ArgumentError, PageError
20
19
  from .matcher import DirectoryEntry, FileEntry, Matcher, MatcherOptions
21
20
  from .metadata import ConfluenceSiteMetadata
21
+ from .options import ConfluencePageID, DocumentOptions
22
22
  from .scanner import Scanner
23
+ from .toc import unique_title
23
24
 
24
25
  LOGGER = logging.getLogger(__name__)
25
26
 
26
27
 
27
28
  class DocumentNode:
29
+ "Represents a Markdown document in a hierarchy."
30
+
28
31
  absolute_path: Path
29
32
  page_id: str | None
30
33
  space_key: str | None
@@ -49,24 +52,42 @@ class DocumentNode:
49
52
  self._children = []
50
53
 
51
54
  def count(self) -> int:
55
+ "Number of descendants in the sub-tree spanned by this node (excluding the top-level node)."
56
+
52
57
  c = len(self._children)
53
58
  for child in self._children:
54
59
  c += child.count()
55
60
  return c
56
61
 
57
62
  def add_child(self, child: "DocumentNode") -> None:
63
+ "Adds a new node to the list of direct children."
64
+
58
65
  self._children.append(child)
59
66
 
60
67
  def children(self) -> Iterable["DocumentNode"]:
68
+ "Direct children of this node."
69
+
61
70
  for child in self._children:
62
71
  yield child
63
72
 
64
73
  def descendants(self) -> Iterable["DocumentNode"]:
74
+ """
75
+ Descendants of this node, part of its sub-tree.
76
+
77
+ Traversal follows depth-first search.
78
+ """
79
+
65
80
  for child in self._children:
66
81
  yield child
67
82
  yield from child.descendants()
68
83
 
69
84
  def all(self) -> Iterable["DocumentNode"]:
85
+ """
86
+ Descendants of this node, part of the sub-tree including the top-level node.
87
+
88
+ Traversal follows depth-first search.
89
+ """
90
+
70
91
  yield self
71
92
  for child in self._children:
72
93
  yield from child.all()
@@ -77,7 +98,7 @@ class Processor:
77
98
  Processes a single Markdown page or a directory of Markdown pages.
78
99
  """
79
100
 
80
- options: ConfluenceDocumentOptions
101
+ options: DocumentOptions
81
102
  site: ConfluenceSiteMetadata
82
103
  root_dir: Path
83
104
 
@@ -85,7 +106,7 @@ class Processor:
85
106
 
86
107
  def __init__(
87
108
  self,
88
- options: ConfluenceDocumentOptions,
109
+ options: DocumentOptions,
89
110
  site: ConfluenceSiteMetadata,
90
111
  root_dir: Path,
91
112
  ) -> None:
@@ -123,6 +144,22 @@ class Processor:
123
144
  Processes a sub-tree rooted at an ancestor node.
124
145
  """
125
146
 
147
+ # verify if pages have a unique title to avoid overwrites within synchronized set
148
+ title_to_path: dict[str, Path] = {}
149
+ duplicates: set[Path] = set()
150
+ for node in root.all():
151
+ if node.title is not None:
152
+ path = title_to_path.get(node.title)
153
+ if path is not None:
154
+ duplicates.add(path)
155
+ duplicates.add(node.absolute_path)
156
+ else:
157
+ title_to_path[node.title] = node.absolute_path
158
+ if duplicates:
159
+ raise PageError(
160
+ f"expected: each synchronized page to have a unique title but duplicates found in files: {', '.join(str(p) for p in sorted(list(duplicates)))}"
161
+ )
162
+
126
163
  # synchronize directory tree structure with page hierarchy in space (find matching pages in Confluence)
127
164
  self._synchronize_tree(root, self.options.root_page_id)
128
165
 
@@ -140,7 +177,7 @@ class Processor:
140
177
  self._update_page(page_id, document, path)
141
178
 
142
179
  @abstractmethod
143
- def _synchronize_tree(self, root: DocumentNode, root_id: ConfluencePageID | None) -> None:
180
+ def _synchronize_tree(self, tree: DocumentNode, root_id: ConfluencePageID | None) -> None:
144
181
  """
145
182
  Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
146
183
 
@@ -226,14 +263,19 @@ class Processor:
226
263
  LOGGER.info("Indexing file: %s", path)
227
264
 
228
265
  # extract information from a Markdown document found in a local directory.
229
- document = Scanner().read(path)
266
+ with open(path, "r", encoding="utf-8") as f:
267
+ text = f.read()
268
+
269
+ document = Scanner().parse(text)
270
+ props = document.properties
271
+ title = props.title or unique_title(text)
230
272
 
231
273
  return DocumentNode(
232
274
  absolute_path=path,
233
- page_id=document.page_id,
234
- space_key=document.space_key,
235
- title=document.title,
236
- synchronized=document.synchronized if document.synchronized is not None else True,
275
+ page_id=props.page_id,
276
+ space_key=props.space_key,
277
+ title=title,
278
+ synchronized=props.synchronized if props.synchronized is not None else True,
237
279
  )
238
280
 
239
281
  def _generate_hash(self, absolute_path: Path) -> str:
@@ -247,10 +289,10 @@ class Processor:
247
289
 
248
290
 
249
291
  class ProcessorFactory:
250
- options: ConfluenceDocumentOptions
292
+ options: DocumentOptions
251
293
  site: ConfluenceSiteMetadata
252
294
 
253
- def __init__(self, options: ConfluenceDocumentOptions, site: ConfluenceSiteMetadata) -> None:
295
+ def __init__(self, options: DocumentOptions, site: ConfluenceSiteMetadata) -> None:
254
296
  self.options = options
255
297
  self.site = site
256
298
 
md2conf/publisher.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Publish Markdown files to Confluence wiki.
3
3
 
4
- Copyright 2022-2025, Levente Hunyadi
4
+ Copyright 2022-2026, Levente Hunyadi
5
5
 
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
@@ -10,18 +10,78 @@ import logging
10
10
  from pathlib import Path
11
11
 
12
12
  from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluenceSession, ConfluenceStatus
13
- from .converter import ConfluenceDocument, attachment_name, get_volatile_attributes, get_volatile_elements
13
+ from .attachment import attachment_name
14
+ from .compatibility import override, path_relative_to
15
+ from .converter import ConfluenceDocument, get_volatile_attributes, get_volatile_elements
14
16
  from .csf import AC_ATTR, elements_from_string
15
- from .domain import ConfluenceDocumentOptions, ConfluencePageID
16
17
  from .environment import PageError
17
- from .extra import override, path_relative_to
18
18
  from .metadata import ConfluencePageMetadata
19
+ from .options import ConfluencePageID, DocumentOptions
19
20
  from .processor import Converter, DocumentNode, Processor, ProcessorFactory
20
21
  from .xml import is_xml_equal, unwrap_substitute
21
22
 
22
23
  LOGGER = logging.getLogger(__name__)
23
24
 
24
25
 
26
+ class _MissingType:
27
+ pass
28
+
29
+
30
+ _MissingDefault = _MissingType()
31
+
32
+
33
+ class ParentCatalog:
34
+ "Maintains a catalog of child-parent relationships."
35
+
36
+ _api: ConfluenceSession
37
+ _child_to_parent: dict[str, str | None]
38
+ _known: set[str]
39
+
40
+ def __init__(self, api: ConfluenceSession) -> None:
41
+ self._api = api
42
+ self._child_to_parent = {}
43
+ self._known = set()
44
+
45
+ def add_known(self, page_id: str) -> None:
46
+ """
47
+ Adds a new well-known page such as the root page or a page paired with a Markdown file using an explicit page ID.
48
+ """
49
+
50
+ self._known.add(page_id)
51
+
52
+ def add_parent(self, *, page_id: str, parent_id: str | None) -> None:
53
+ """
54
+ Adds a new child-parent relationship.
55
+
56
+ This method is useful to persist information acquired by a previous API call.
57
+ """
58
+
59
+ self._child_to_parent[page_id] = parent_id
60
+
61
+ def is_traceable(self, page_id: str) -> bool:
62
+ """
63
+ Verifies if a page traces back to a well-known root page.
64
+
65
+ :param page_id: The page to check.
66
+ """
67
+
68
+ if page_id in self._known:
69
+ return True
70
+
71
+ known_parent_id = self._child_to_parent.get(page_id, _MissingDefault)
72
+ if not isinstance(known_parent_id, _MissingType):
73
+ parent_id = known_parent_id
74
+ else:
75
+ page = self._api.get_page_properties(page_id)
76
+ parent_id = page.parentId
77
+ self._child_to_parent[page_id] = parent_id
78
+
79
+ if parent_id is None:
80
+ return False
81
+
82
+ return self.is_traceable(parent_id)
83
+
84
+
25
85
  class SynchronizingProcessor(Processor):
26
86
  """
27
87
  Synchronizes a single Markdown page or a directory of Markdown pages with Confluence.
@@ -29,7 +89,7 @@ class SynchronizingProcessor(Processor):
29
89
 
30
90
  api: ConfluenceSession
31
91
 
32
- def __init__(self, api: ConfluenceSession, options: ConfluenceDocumentOptions, root_dir: Path) -> None:
92
+ def __init__(self, api: ConfluenceSession, options: DocumentOptions, root_dir: Path) -> None:
33
93
  """
34
94
  Initializes a new processor instance.
35
95
 
@@ -42,7 +102,7 @@ class SynchronizingProcessor(Processor):
42
102
  self.api = api
43
103
 
44
104
  @override
45
- def _synchronize_tree(self, root: DocumentNode, root_id: ConfluencePageID | None) -> None:
105
+ def _synchronize_tree(self, tree: DocumentNode, root_id: ConfluencePageID | None) -> None:
46
106
  """
47
107
  Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
48
108
 
@@ -51,26 +111,25 @@ class SynchronizingProcessor(Processor):
51
111
  Updates the original Markdown document to add tags to associate the document with its corresponding Confluence page.
52
112
  """
53
113
 
54
- if root.page_id is None and root_id is None:
55
- raise PageError(f"expected: root page ID in options, or explicit page ID in {root.absolute_path}")
56
- elif root.page_id is not None and root_id is not None:
57
- if root.page_id != root_id.page_id:
58
- raise PageError(f"mismatched inferred page ID of {root_id.page_id} and explicit page ID in {root.absolute_path}")
59
-
60
- real_id = root_id
114
+ if tree.page_id is None and root_id is None:
115
+ raise PageError(f"expected: root page ID in options, or explicit page ID in {tree.absolute_path}")
116
+ elif tree.page_id is not None:
117
+ real_id = ConfluencePageID(tree.page_id) # explicit page ID takes precedence
61
118
  elif root_id is not None:
62
119
  real_id = root_id
63
- elif root.page_id is not None:
64
- real_id = ConfluencePageID(root.page_id)
65
120
  else:
66
- raise NotImplementedError("condition not exhaustive")
121
+ raise NotImplementedError("condition not exhaustive for synchronizing tree")
67
122
 
68
- self._synchronize_subtree(root, real_id)
123
+ catalog = ParentCatalog(self.api)
124
+ catalog.add_known(real_id.page_id)
125
+ self._synchronize_subtree(tree, real_id, catalog)
69
126
 
70
- def _synchronize_subtree(self, node: DocumentNode, parent_id: ConfluencePageID) -> None:
127
+ def _synchronize_subtree(self, node: DocumentNode, parent_id: ConfluencePageID, catalog: ParentCatalog) -> None:
71
128
  if node.page_id is not None:
72
129
  # verify if page exists
73
130
  page = self.api.get_page_properties(node.page_id)
131
+ catalog.add_known(page.id)
132
+ catalog.add_parent(page_id=page.id, parent_id=page.parentId)
74
133
  update = False
75
134
  else:
76
135
  if node.title is not None:
@@ -81,20 +140,26 @@ class SynchronizingProcessor(Processor):
81
140
  digest = self._generate_hash(node.absolute_path)
82
141
  title = f"{node.absolute_path.stem} [{digest}]"
83
142
 
84
- if self.options.title_prefix is not None:
85
- title = f"{self.options.title_prefix} {title}"
143
+ title = self._get_extended_title(title)
86
144
 
87
145
  # look up page by (possibly auto-generated) title
88
146
  page = self.api.get_or_create_page(title, parent_id.page_id)
147
+ catalog.add_parent(page_id=page.id, parent_id=page.parentId)
89
148
 
90
149
  if page.status is ConfluenceStatus.ARCHIVED:
91
- # user has archived a page with this (auto-generated) title
92
- raise PageError(f"unable to update archived page with ID {page.id}")
150
+ # user has archived a page with this (possibly auto-generated) title
151
+ raise PageError(f"unable to update archived page with ID {page.id} when synchronizing {node.absolute_path}")
152
+
153
+ if not catalog.is_traceable(page.id):
154
+ raise PageError(
155
+ f"expected: page with ID {page.id} to be a descendant of the root page or one of the pages paired with a Markdown file using an explicit "
156
+ f"page ID when synchronizing {node.absolute_path}"
157
+ )
93
158
 
94
159
  update = True
95
160
 
96
161
  space_key = self.api.space_id_to_key(page.spaceId)
97
- if update:
162
+ if update and not self.options.skip_update:
98
163
  self._update_markdown(
99
164
  node.absolute_path,
100
165
  page_id=page.id,
@@ -110,7 +175,7 @@ class SynchronizingProcessor(Processor):
110
175
  self.page_metadata.add(node.absolute_path, data)
111
176
 
112
177
  for child_node in node.children():
113
- self._synchronize_subtree(child_node, ConfluencePageID(page.id))
178
+ self._synchronize_subtree(child_node, ConfluencePageID(page.id), catalog)
114
179
 
115
180
  @override
116
181
  def _update_page(self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path) -> None:
@@ -140,19 +205,7 @@ class SynchronizingProcessor(Processor):
140
205
  content = document.xhtml()
141
206
  LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
142
207
 
143
- title = None
144
- if document.title is not None:
145
- meta = self.page_metadata.get(path)
146
- if meta is not None and meta.title != document.title:
147
- conflicting_page_id = self.api.page_exists(document.title, space_id=self.api.space_key_to_id(meta.space_key))
148
- if conflicting_page_id is None:
149
- title = document.title
150
- else:
151
- LOGGER.info(
152
- "Document title of %s conflicts with Confluence page title of %s",
153
- path,
154
- conflicting_page_id,
155
- )
208
+ title = self._get_unique_title(document, path)
156
209
 
157
210
  # fetch existing page
158
211
  page = self.api.get_page(page_id.page_id)
@@ -183,6 +236,41 @@ class SynchronizingProcessor(Processor):
183
236
  if document.properties is not None:
184
237
  self.api.update_content_properties_for_page(page_id.page_id, [ConfluenceContentProperty(key, value) for key, value in document.properties.items()])
185
238
 
239
+ def _get_extended_title(self, title: str) -> str:
240
+ """
241
+ Returns a title with the title prefix applied (if any).
242
+ """
243
+
244
+ if self.options.title_prefix is not None:
245
+ return f"{self.options.title_prefix} {title}"
246
+ else:
247
+ return title
248
+
249
+ def _get_unique_title(self, document: ConfluenceDocument, path: Path) -> str | None:
250
+ """
251
+ Determines the (new) document title to assign to the Confluence page.
252
+
253
+ Ensures that the title is unique across the Confluence space.
254
+ """
255
+
256
+ # document has no title (neither in front-matter nor as unique top-level heading)
257
+ if document.title is None:
258
+ return None
259
+
260
+ # add configured title prefix
261
+ title = self._get_extended_title(document.title)
262
+
263
+ # compare current document title with title discovered during directory traversal
264
+ meta = self.page_metadata.get(path)
265
+ if meta is not None and meta.title != title:
266
+ # title has changed, check if new title is available
267
+ page_id = self.api.page_exists(title, space_id=self.api.space_key_to_id(meta.space_key))
268
+ if page_id is not None:
269
+ LOGGER.info("Unrelated Confluence page with ID %s has the same inferred title as the Markdown file: %s", page_id, path)
270
+ return None
271
+
272
+ return title
273
+
186
274
  def _update_markdown(self, path: Path, *, page_id: str, space_key: str) -> None:
187
275
  """
188
276
  Writes the Confluence page ID and space key at the beginning of the Markdown file.
@@ -212,7 +300,7 @@ class SynchronizingProcessor(Processor):
212
300
  class SynchronizingProcessorFactory(ProcessorFactory):
213
301
  api: ConfluenceSession
214
302
 
215
- def __init__(self, api: ConfluenceSession, options: ConfluenceDocumentOptions) -> None:
303
+ def __init__(self, api: ConfluenceSession, options: DocumentOptions) -> None:
216
304
  super().__init__(options, api.site)
217
305
  self.api = api
218
306
 
@@ -227,5 +315,5 @@ class Publisher(Converter):
227
315
  This is the class instantiated by the command-line application.
228
316
  """
229
317
 
230
- def __init__(self, api: ConfluenceSession, options: ConfluenceDocumentOptions) -> None:
318
+ def __init__(self, api: ConfluenceSession, options: DocumentOptions) -> None:
231
319
  super().__init__(SynchronizingProcessorFactory(api, options))