markdown-to-confluence 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/METADATA +258 -157
- markdown_to_confluence-0.5.4.dist-info/RECORD +55 -0
- {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/licenses/LICENSE +1 -1
- md2conf/__init__.py +2 -2
- md2conf/__main__.py +83 -44
- md2conf/api.py +30 -10
- md2conf/attachment.py +72 -0
- md2conf/coalesce.py +43 -0
- md2conf/collection.py +1 -1
- md2conf/{extra.py → compatibility.py} +1 -1
- md2conf/converter.py +240 -657
- md2conf/csf.py +13 -11
- md2conf/drawio/__init__.py +0 -0
- md2conf/drawio/extension.py +116 -0
- md2conf/{drawio.py → drawio/render.py} +1 -1
- md2conf/emoticon.py +3 -3
- md2conf/environment.py +2 -2
- md2conf/extension.py +82 -0
- md2conf/external.py +66 -0
- md2conf/formatting.py +135 -0
- md2conf/frontmatter.py +70 -0
- md2conf/image.py +128 -0
- md2conf/latex.py +4 -183
- md2conf/local.py +8 -8
- md2conf/markdown.py +1 -1
- md2conf/matcher.py +1 -1
- md2conf/mermaid/__init__.py +0 -0
- md2conf/mermaid/config.py +20 -0
- md2conf/mermaid/extension.py +109 -0
- md2conf/{mermaid.py → mermaid/render.py} +10 -38
- md2conf/mermaid/scanner.py +55 -0
- md2conf/metadata.py +1 -1
- md2conf/{domain.py → options.py} +75 -16
- md2conf/plantuml/__init__.py +0 -0
- md2conf/plantuml/config.py +20 -0
- md2conf/plantuml/extension.py +158 -0
- md2conf/plantuml/render.py +138 -0
- md2conf/plantuml/scanner.py +56 -0
- md2conf/png.py +206 -0
- md2conf/processor.py +55 -13
- md2conf/publisher.py +127 -39
- md2conf/scanner.py +38 -129
- md2conf/serializer.py +2 -2
- md2conf/svg.py +144 -103
- md2conf/text.py +1 -1
- md2conf/toc.py +73 -1
- md2conf/uri.py +1 -1
- md2conf/xml.py +1 -1
- markdown_to_confluence-0.5.2.dist-info/RECORD +0 -36
- {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/WHEEL +0 -0
- {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/entry_points.txt +0 -0
- {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/top_level.txt +0 -0
- {markdown_to_confluence-0.5.2.dist-info → markdown_to_confluence-0.5.4.dist-info}/zip-safe +0 -0
- /md2conf/{puppeteer-config.json → mermaid/puppeteer-config.json} +0 -0
md2conf/png.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Publish Markdown files to Confluence wiki.
|
|
3
|
+
|
|
4
|
+
Copyright 2022-2026, Levente Hunyadi
|
|
5
|
+
|
|
6
|
+
:see: https://github.com/hunyadi/md2conf
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from io import BytesIO
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from struct import unpack
|
|
12
|
+
from typing import BinaryIO, Iterable, overload
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ImageFormatError(RuntimeError):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _Chunk:
|
|
20
|
+
"Data chunk in binary data as per the PNG image format."
|
|
21
|
+
|
|
22
|
+
__slots__ = ("length", "name", "data", "crc")
|
|
23
|
+
|
|
24
|
+
length: int
|
|
25
|
+
name: bytes
|
|
26
|
+
data: bytes
|
|
27
|
+
crc: bytes
|
|
28
|
+
|
|
29
|
+
def __init__(self, length: int, name: bytes, data: bytes, crc: bytes):
|
|
30
|
+
self.length = length
|
|
31
|
+
self.name = name
|
|
32
|
+
self.data = data
|
|
33
|
+
self.crc = crc
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _read_signature(f: BinaryIO) -> None:
|
|
37
|
+
"Reads and checks PNG signature (first 8 bytes)."
|
|
38
|
+
|
|
39
|
+
signature = f.read(8)
|
|
40
|
+
if signature != b"\x89PNG\r\n\x1a\n":
|
|
41
|
+
raise ImageFormatError("not a valid PNG file")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _read_chunk(f: BinaryIO) -> _Chunk | None:
|
|
45
|
+
"Reads and parses a PNG chunk such as `IHDR` or `tEXt`."
|
|
46
|
+
|
|
47
|
+
length_bytes = f.read(4)
|
|
48
|
+
if not length_bytes:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
if len(length_bytes) != 4:
|
|
52
|
+
raise ImageFormatError("expected: 4 bytes storing chunk length")
|
|
53
|
+
|
|
54
|
+
length = int.from_bytes(length_bytes, "big")
|
|
55
|
+
|
|
56
|
+
data_length = 4 + length + 4
|
|
57
|
+
data_bytes = f.read(data_length)
|
|
58
|
+
actual_length = len(data_bytes)
|
|
59
|
+
if actual_length != data_length:
|
|
60
|
+
raise ImageFormatError(f"expected: {length} bytes storing chunk data; got: {actual_length}")
|
|
61
|
+
|
|
62
|
+
chunk_type = data_bytes[0:4]
|
|
63
|
+
chunk_data = data_bytes[4:-4]
|
|
64
|
+
crc = data_bytes[-4:]
|
|
65
|
+
|
|
66
|
+
return _Chunk(length, chunk_type, chunk_data, crc)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _extract_png_dimensions(source_file: BinaryIO) -> tuple[int, int]:
|
|
70
|
+
"""
|
|
71
|
+
Returns the width and height of a PNG image inspecting its header.
|
|
72
|
+
|
|
73
|
+
:param source_file: A binary file opened for reading that contains PNG image data.
|
|
74
|
+
:returns: A tuple of the image's width and height in pixels.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
_read_signature(source_file)
|
|
78
|
+
|
|
79
|
+
# validate IHDR (Image Header) chunk
|
|
80
|
+
ihdr = _read_chunk(source_file)
|
|
81
|
+
if ihdr is None:
|
|
82
|
+
raise ImageFormatError("missing IHDR chunk")
|
|
83
|
+
|
|
84
|
+
if ihdr.length != 13:
|
|
85
|
+
raise ImageFormatError("invalid chunk length")
|
|
86
|
+
if ihdr.name != b"IHDR":
|
|
87
|
+
raise ImageFormatError(f"expected: IHDR chunk; got: {ihdr.name!r}")
|
|
88
|
+
|
|
89
|
+
(
|
|
90
|
+
width,
|
|
91
|
+
height,
|
|
92
|
+
bit_depth, # pyright: ignore[reportUnusedVariable]
|
|
93
|
+
color_type, # pyright: ignore[reportUnusedVariable]
|
|
94
|
+
compression, # pyright: ignore[reportUnusedVariable]
|
|
95
|
+
filter, # pyright: ignore[reportUnusedVariable]
|
|
96
|
+
interlace, # pyright: ignore[reportUnusedVariable]
|
|
97
|
+
) = unpack(">IIBBBBB", ihdr.data) # spellchecker:disable-line
|
|
98
|
+
return width, height
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@overload
|
|
102
|
+
def extract_png_dimensions(*, data: bytes) -> tuple[int, int]: ...
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@overload
|
|
106
|
+
def extract_png_dimensions(*, path: str | Path) -> tuple[int, int]: ...
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def extract_png_dimensions(*, data: bytes | None = None, path: str | Path | None = None) -> tuple[int, int]:
|
|
110
|
+
"""
|
|
111
|
+
Returns the width and height of a PNG image inspecting its header.
|
|
112
|
+
|
|
113
|
+
:param data: PNG image data.
|
|
114
|
+
:param path: Path to the PNG image file.
|
|
115
|
+
:returns: A tuple of the image's width and height in pixels.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
if data is not None and path is not None:
|
|
119
|
+
raise TypeError("expected: either `data` or `path`; got: both")
|
|
120
|
+
elif data is not None:
|
|
121
|
+
with BytesIO(data) as f:
|
|
122
|
+
return _extract_png_dimensions(f)
|
|
123
|
+
elif path is not None:
|
|
124
|
+
with open(path, "rb") as f:
|
|
125
|
+
return _extract_png_dimensions(f)
|
|
126
|
+
else:
|
|
127
|
+
raise TypeError("expected: either `data` or `path`; got: neither")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _write_chunk(f: BinaryIO, chunk: _Chunk) -> None:
|
|
131
|
+
f.write(chunk.length.to_bytes(4, "big"))
|
|
132
|
+
f.write(chunk.name)
|
|
133
|
+
f.write(chunk.data)
|
|
134
|
+
f.write(chunk.crc)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _remove_png_chunks(names: Iterable[str], source_file: BinaryIO, target_file: BinaryIO) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Rewrites a PNG file by removing chunks with the specified names.
|
|
140
|
+
|
|
141
|
+
:param source_file: A binary file opened for reading that contains PNG image data.
|
|
142
|
+
:param target_file: A binary file opened for writing to receive PNG image data.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
exclude_set = set(name.encode("ascii") for name in names)
|
|
146
|
+
|
|
147
|
+
_read_signature(source_file)
|
|
148
|
+
target_file.write(b"\x89PNG\r\n\x1a\n")
|
|
149
|
+
|
|
150
|
+
while True:
|
|
151
|
+
chunk = _read_chunk(source_file)
|
|
152
|
+
if chunk is None:
|
|
153
|
+
break
|
|
154
|
+
|
|
155
|
+
if chunk.name not in exclude_set:
|
|
156
|
+
_write_chunk(target_file, chunk)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@overload
|
|
160
|
+
def remove_png_chunks(names: Iterable[str], *, source_data: bytes) -> bytes: ...
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@overload
|
|
164
|
+
def remove_png_chunks(names: Iterable[str], *, source_path: str | Path) -> bytes: ...
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@overload
|
|
168
|
+
def remove_png_chunks(names: Iterable[str], *, source_data: bytes, target_path: str | Path) -> None: ...
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@overload
|
|
172
|
+
def remove_png_chunks(names: Iterable[str], *, source_path: str | Path, target_path: str | Path) -> None: ...
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def remove_png_chunks(
|
|
176
|
+
names: Iterable[str], *, source_data: bytes | None = None, source_path: str | Path | None = None, target_path: str | Path | None = None
|
|
177
|
+
) -> bytes | None:
|
|
178
|
+
"""
|
|
179
|
+
Rewrites a PNG file by removing chunks with the specified names.
|
|
180
|
+
|
|
181
|
+
:param source_data: PNG image data.
|
|
182
|
+
:param source_path: Path to the file to read from.
|
|
183
|
+
:param target_path: Path to the file to write to.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
if source_data is not None and source_path is not None:
|
|
187
|
+
raise TypeError("expected: either `source_data` or `source_path`; got: both")
|
|
188
|
+
elif source_data is not None:
|
|
189
|
+
|
|
190
|
+
def source_reader() -> BinaryIO:
|
|
191
|
+
return BytesIO(source_data)
|
|
192
|
+
elif source_path is not None:
|
|
193
|
+
|
|
194
|
+
def source_reader() -> BinaryIO:
|
|
195
|
+
return open(source_path, "rb")
|
|
196
|
+
else:
|
|
197
|
+
raise TypeError("expected: either `source_data` or `source_path`; got: neither")
|
|
198
|
+
|
|
199
|
+
if target_path is None:
|
|
200
|
+
with source_reader() as source_file, BytesIO() as memory_file:
|
|
201
|
+
_remove_png_chunks(names, source_file, memory_file)
|
|
202
|
+
return memory_file.getvalue()
|
|
203
|
+
else:
|
|
204
|
+
with source_reader() as source_file, open(target_path, "wb") as target_file:
|
|
205
|
+
_remove_png_chunks(names, source_file, target_file)
|
|
206
|
+
return None
|
md2conf/processor.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Publish Markdown files to Confluence wiki.
|
|
3
3
|
|
|
4
|
-
Copyright 2022-
|
|
4
|
+
Copyright 2022-2026, Levente Hunyadi
|
|
5
5
|
|
|
6
6
|
:see: https://github.com/hunyadi/md2conf
|
|
7
7
|
"""
|
|
@@ -15,16 +15,19 @@ from typing import Iterable
|
|
|
15
15
|
|
|
16
16
|
from .collection import ConfluencePageCollection
|
|
17
17
|
from .converter import ConfluenceDocument
|
|
18
|
-
from .
|
|
19
|
-
from .environment import ArgumentError
|
|
18
|
+
from .environment import ArgumentError, PageError
|
|
20
19
|
from .matcher import DirectoryEntry, FileEntry, Matcher, MatcherOptions
|
|
21
20
|
from .metadata import ConfluenceSiteMetadata
|
|
21
|
+
from .options import ConfluencePageID, DocumentOptions
|
|
22
22
|
from .scanner import Scanner
|
|
23
|
+
from .toc import unique_title
|
|
23
24
|
|
|
24
25
|
LOGGER = logging.getLogger(__name__)
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class DocumentNode:
|
|
29
|
+
"Represents a Markdown document in a hierarchy."
|
|
30
|
+
|
|
28
31
|
absolute_path: Path
|
|
29
32
|
page_id: str | None
|
|
30
33
|
space_key: str | None
|
|
@@ -49,24 +52,42 @@ class DocumentNode:
|
|
|
49
52
|
self._children = []
|
|
50
53
|
|
|
51
54
|
def count(self) -> int:
|
|
55
|
+
"Number of descendants in the sub-tree spanned by this node (excluding the top-level node)."
|
|
56
|
+
|
|
52
57
|
c = len(self._children)
|
|
53
58
|
for child in self._children:
|
|
54
59
|
c += child.count()
|
|
55
60
|
return c
|
|
56
61
|
|
|
57
62
|
def add_child(self, child: "DocumentNode") -> None:
|
|
63
|
+
"Adds a new node to the list of direct children."
|
|
64
|
+
|
|
58
65
|
self._children.append(child)
|
|
59
66
|
|
|
60
67
|
def children(self) -> Iterable["DocumentNode"]:
|
|
68
|
+
"Direct children of this node."
|
|
69
|
+
|
|
61
70
|
for child in self._children:
|
|
62
71
|
yield child
|
|
63
72
|
|
|
64
73
|
def descendants(self) -> Iterable["DocumentNode"]:
|
|
74
|
+
"""
|
|
75
|
+
Descendants of this node, part of its sub-tree.
|
|
76
|
+
|
|
77
|
+
Traversal follows depth-first search.
|
|
78
|
+
"""
|
|
79
|
+
|
|
65
80
|
for child in self._children:
|
|
66
81
|
yield child
|
|
67
82
|
yield from child.descendants()
|
|
68
83
|
|
|
69
84
|
def all(self) -> Iterable["DocumentNode"]:
|
|
85
|
+
"""
|
|
86
|
+
Descendants of this node, part of the sub-tree including the top-level node.
|
|
87
|
+
|
|
88
|
+
Traversal follows depth-first search.
|
|
89
|
+
"""
|
|
90
|
+
|
|
70
91
|
yield self
|
|
71
92
|
for child in self._children:
|
|
72
93
|
yield from child.all()
|
|
@@ -77,7 +98,7 @@ class Processor:
|
|
|
77
98
|
Processes a single Markdown page or a directory of Markdown pages.
|
|
78
99
|
"""
|
|
79
100
|
|
|
80
|
-
options:
|
|
101
|
+
options: DocumentOptions
|
|
81
102
|
site: ConfluenceSiteMetadata
|
|
82
103
|
root_dir: Path
|
|
83
104
|
|
|
@@ -85,7 +106,7 @@ class Processor:
|
|
|
85
106
|
|
|
86
107
|
def __init__(
|
|
87
108
|
self,
|
|
88
|
-
options:
|
|
109
|
+
options: DocumentOptions,
|
|
89
110
|
site: ConfluenceSiteMetadata,
|
|
90
111
|
root_dir: Path,
|
|
91
112
|
) -> None:
|
|
@@ -123,6 +144,22 @@ class Processor:
|
|
|
123
144
|
Processes a sub-tree rooted at an ancestor node.
|
|
124
145
|
"""
|
|
125
146
|
|
|
147
|
+
# verify if pages have a unique title to avoid overwrites within synchronized set
|
|
148
|
+
title_to_path: dict[str, Path] = {}
|
|
149
|
+
duplicates: set[Path] = set()
|
|
150
|
+
for node in root.all():
|
|
151
|
+
if node.title is not None:
|
|
152
|
+
path = title_to_path.get(node.title)
|
|
153
|
+
if path is not None:
|
|
154
|
+
duplicates.add(path)
|
|
155
|
+
duplicates.add(node.absolute_path)
|
|
156
|
+
else:
|
|
157
|
+
title_to_path[node.title] = node.absolute_path
|
|
158
|
+
if duplicates:
|
|
159
|
+
raise PageError(
|
|
160
|
+
f"expected: each synchronized page to have a unique title but duplicates found in files: {', '.join(str(p) for p in sorted(list(duplicates)))}"
|
|
161
|
+
)
|
|
162
|
+
|
|
126
163
|
# synchronize directory tree structure with page hierarchy in space (find matching pages in Confluence)
|
|
127
164
|
self._synchronize_tree(root, self.options.root_page_id)
|
|
128
165
|
|
|
@@ -140,7 +177,7 @@ class Processor:
|
|
|
140
177
|
self._update_page(page_id, document, path)
|
|
141
178
|
|
|
142
179
|
@abstractmethod
|
|
143
|
-
def _synchronize_tree(self,
|
|
180
|
+
def _synchronize_tree(self, tree: DocumentNode, root_id: ConfluencePageID | None) -> None:
|
|
144
181
|
"""
|
|
145
182
|
Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
|
|
146
183
|
|
|
@@ -226,14 +263,19 @@ class Processor:
|
|
|
226
263
|
LOGGER.info("Indexing file: %s", path)
|
|
227
264
|
|
|
228
265
|
# extract information from a Markdown document found in a local directory.
|
|
229
|
-
|
|
266
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
267
|
+
text = f.read()
|
|
268
|
+
|
|
269
|
+
document = Scanner().parse(text)
|
|
270
|
+
props = document.properties
|
|
271
|
+
title = props.title or unique_title(text)
|
|
230
272
|
|
|
231
273
|
return DocumentNode(
|
|
232
274
|
absolute_path=path,
|
|
233
|
-
page_id=
|
|
234
|
-
space_key=
|
|
235
|
-
title=
|
|
236
|
-
synchronized=
|
|
275
|
+
page_id=props.page_id,
|
|
276
|
+
space_key=props.space_key,
|
|
277
|
+
title=title,
|
|
278
|
+
synchronized=props.synchronized if props.synchronized is not None else True,
|
|
237
279
|
)
|
|
238
280
|
|
|
239
281
|
def _generate_hash(self, absolute_path: Path) -> str:
|
|
@@ -247,10 +289,10 @@ class Processor:
|
|
|
247
289
|
|
|
248
290
|
|
|
249
291
|
class ProcessorFactory:
|
|
250
|
-
options:
|
|
292
|
+
options: DocumentOptions
|
|
251
293
|
site: ConfluenceSiteMetadata
|
|
252
294
|
|
|
253
|
-
def __init__(self, options:
|
|
295
|
+
def __init__(self, options: DocumentOptions, site: ConfluenceSiteMetadata) -> None:
|
|
254
296
|
self.options = options
|
|
255
297
|
self.site = site
|
|
256
298
|
|
md2conf/publisher.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Publish Markdown files to Confluence wiki.
|
|
3
3
|
|
|
4
|
-
Copyright 2022-
|
|
4
|
+
Copyright 2022-2026, Levente Hunyadi
|
|
5
5
|
|
|
6
6
|
:see: https://github.com/hunyadi/md2conf
|
|
7
7
|
"""
|
|
@@ -10,18 +10,78 @@ import logging
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
|
|
12
12
|
from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluenceSession, ConfluenceStatus
|
|
13
|
-
from .
|
|
13
|
+
from .attachment import attachment_name
|
|
14
|
+
from .compatibility import override, path_relative_to
|
|
15
|
+
from .converter import ConfluenceDocument, get_volatile_attributes, get_volatile_elements
|
|
14
16
|
from .csf import AC_ATTR, elements_from_string
|
|
15
|
-
from .domain import ConfluenceDocumentOptions, ConfluencePageID
|
|
16
17
|
from .environment import PageError
|
|
17
|
-
from .extra import override, path_relative_to
|
|
18
18
|
from .metadata import ConfluencePageMetadata
|
|
19
|
+
from .options import ConfluencePageID, DocumentOptions
|
|
19
20
|
from .processor import Converter, DocumentNode, Processor, ProcessorFactory
|
|
20
21
|
from .xml import is_xml_equal, unwrap_substitute
|
|
21
22
|
|
|
22
23
|
LOGGER = logging.getLogger(__name__)
|
|
23
24
|
|
|
24
25
|
|
|
26
|
+
class _MissingType:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
_MissingDefault = _MissingType()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ParentCatalog:
|
|
34
|
+
"Maintains a catalog of child-parent relationships."
|
|
35
|
+
|
|
36
|
+
_api: ConfluenceSession
|
|
37
|
+
_child_to_parent: dict[str, str | None]
|
|
38
|
+
_known: set[str]
|
|
39
|
+
|
|
40
|
+
def __init__(self, api: ConfluenceSession) -> None:
|
|
41
|
+
self._api = api
|
|
42
|
+
self._child_to_parent = {}
|
|
43
|
+
self._known = set()
|
|
44
|
+
|
|
45
|
+
def add_known(self, page_id: str) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Adds a new well-known page such as the root page or a page paired with a Markdown file using an explicit page ID.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
self._known.add(page_id)
|
|
51
|
+
|
|
52
|
+
def add_parent(self, *, page_id: str, parent_id: str | None) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Adds a new child-parent relationship.
|
|
55
|
+
|
|
56
|
+
This method is useful to persist information acquired by a previous API call.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
self._child_to_parent[page_id] = parent_id
|
|
60
|
+
|
|
61
|
+
def is_traceable(self, page_id: str) -> bool:
|
|
62
|
+
"""
|
|
63
|
+
Verifies if a page traces back to a well-known root page.
|
|
64
|
+
|
|
65
|
+
:param page_id: The page to check.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
if page_id in self._known:
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
known_parent_id = self._child_to_parent.get(page_id, _MissingDefault)
|
|
72
|
+
if not isinstance(known_parent_id, _MissingType):
|
|
73
|
+
parent_id = known_parent_id
|
|
74
|
+
else:
|
|
75
|
+
page = self._api.get_page_properties(page_id)
|
|
76
|
+
parent_id = page.parentId
|
|
77
|
+
self._child_to_parent[page_id] = parent_id
|
|
78
|
+
|
|
79
|
+
if parent_id is None:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
return self.is_traceable(parent_id)
|
|
83
|
+
|
|
84
|
+
|
|
25
85
|
class SynchronizingProcessor(Processor):
|
|
26
86
|
"""
|
|
27
87
|
Synchronizes a single Markdown page or a directory of Markdown pages with Confluence.
|
|
@@ -29,7 +89,7 @@ class SynchronizingProcessor(Processor):
|
|
|
29
89
|
|
|
30
90
|
api: ConfluenceSession
|
|
31
91
|
|
|
32
|
-
def __init__(self, api: ConfluenceSession, options:
|
|
92
|
+
def __init__(self, api: ConfluenceSession, options: DocumentOptions, root_dir: Path) -> None:
|
|
33
93
|
"""
|
|
34
94
|
Initializes a new processor instance.
|
|
35
95
|
|
|
@@ -42,7 +102,7 @@ class SynchronizingProcessor(Processor):
|
|
|
42
102
|
self.api = api
|
|
43
103
|
|
|
44
104
|
@override
|
|
45
|
-
def _synchronize_tree(self,
|
|
105
|
+
def _synchronize_tree(self, tree: DocumentNode, root_id: ConfluencePageID | None) -> None:
|
|
46
106
|
"""
|
|
47
107
|
Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
|
|
48
108
|
|
|
@@ -51,26 +111,25 @@ class SynchronizingProcessor(Processor):
|
|
|
51
111
|
Updates the original Markdown document to add tags to associate the document with its corresponding Confluence page.
|
|
52
112
|
"""
|
|
53
113
|
|
|
54
|
-
if
|
|
55
|
-
raise PageError(f"expected: root page ID in options, or explicit page ID in {
|
|
56
|
-
elif
|
|
57
|
-
|
|
58
|
-
raise PageError(f"mismatched inferred page ID of {root_id.page_id} and explicit page ID in {root.absolute_path}")
|
|
59
|
-
|
|
60
|
-
real_id = root_id
|
|
114
|
+
if tree.page_id is None and root_id is None:
|
|
115
|
+
raise PageError(f"expected: root page ID in options, or explicit page ID in {tree.absolute_path}")
|
|
116
|
+
elif tree.page_id is not None:
|
|
117
|
+
real_id = ConfluencePageID(tree.page_id) # explicit page ID takes precedence
|
|
61
118
|
elif root_id is not None:
|
|
62
119
|
real_id = root_id
|
|
63
|
-
elif root.page_id is not None:
|
|
64
|
-
real_id = ConfluencePageID(root.page_id)
|
|
65
120
|
else:
|
|
66
|
-
raise NotImplementedError("condition not exhaustive")
|
|
121
|
+
raise NotImplementedError("condition not exhaustive for synchronizing tree")
|
|
67
122
|
|
|
68
|
-
self.
|
|
123
|
+
catalog = ParentCatalog(self.api)
|
|
124
|
+
catalog.add_known(real_id.page_id)
|
|
125
|
+
self._synchronize_subtree(tree, real_id, catalog)
|
|
69
126
|
|
|
70
|
-
def _synchronize_subtree(self, node: DocumentNode, parent_id: ConfluencePageID) -> None:
|
|
127
|
+
def _synchronize_subtree(self, node: DocumentNode, parent_id: ConfluencePageID, catalog: ParentCatalog) -> None:
|
|
71
128
|
if node.page_id is not None:
|
|
72
129
|
# verify if page exists
|
|
73
130
|
page = self.api.get_page_properties(node.page_id)
|
|
131
|
+
catalog.add_known(page.id)
|
|
132
|
+
catalog.add_parent(page_id=page.id, parent_id=page.parentId)
|
|
74
133
|
update = False
|
|
75
134
|
else:
|
|
76
135
|
if node.title is not None:
|
|
@@ -81,20 +140,26 @@ class SynchronizingProcessor(Processor):
|
|
|
81
140
|
digest = self._generate_hash(node.absolute_path)
|
|
82
141
|
title = f"{node.absolute_path.stem} [{digest}]"
|
|
83
142
|
|
|
84
|
-
|
|
85
|
-
title = f"{self.options.title_prefix} {title}"
|
|
143
|
+
title = self._get_extended_title(title)
|
|
86
144
|
|
|
87
145
|
# look up page by (possibly auto-generated) title
|
|
88
146
|
page = self.api.get_or_create_page(title, parent_id.page_id)
|
|
147
|
+
catalog.add_parent(page_id=page.id, parent_id=page.parentId)
|
|
89
148
|
|
|
90
149
|
if page.status is ConfluenceStatus.ARCHIVED:
|
|
91
|
-
# user has archived a page with this (auto-generated) title
|
|
92
|
-
raise PageError(f"unable to update archived page with ID {page.id}")
|
|
150
|
+
# user has archived a page with this (possibly auto-generated) title
|
|
151
|
+
raise PageError(f"unable to update archived page with ID {page.id} when synchronizing {node.absolute_path}")
|
|
152
|
+
|
|
153
|
+
if not catalog.is_traceable(page.id):
|
|
154
|
+
raise PageError(
|
|
155
|
+
f"expected: page with ID {page.id} to be a descendant of the root page or one of the pages paired with a Markdown file using an explicit "
|
|
156
|
+
f"page ID when synchronizing {node.absolute_path}"
|
|
157
|
+
)
|
|
93
158
|
|
|
94
159
|
update = True
|
|
95
160
|
|
|
96
161
|
space_key = self.api.space_id_to_key(page.spaceId)
|
|
97
|
-
if update:
|
|
162
|
+
if update and not self.options.skip_update:
|
|
98
163
|
self._update_markdown(
|
|
99
164
|
node.absolute_path,
|
|
100
165
|
page_id=page.id,
|
|
@@ -110,7 +175,7 @@ class SynchronizingProcessor(Processor):
|
|
|
110
175
|
self.page_metadata.add(node.absolute_path, data)
|
|
111
176
|
|
|
112
177
|
for child_node in node.children():
|
|
113
|
-
self._synchronize_subtree(child_node, ConfluencePageID(page.id))
|
|
178
|
+
self._synchronize_subtree(child_node, ConfluencePageID(page.id), catalog)
|
|
114
179
|
|
|
115
180
|
@override
|
|
116
181
|
def _update_page(self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path) -> None:
|
|
@@ -140,19 +205,7 @@ class SynchronizingProcessor(Processor):
|
|
|
140
205
|
content = document.xhtml()
|
|
141
206
|
LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
|
|
142
207
|
|
|
143
|
-
title =
|
|
144
|
-
if document.title is not None:
|
|
145
|
-
meta = self.page_metadata.get(path)
|
|
146
|
-
if meta is not None and meta.title != document.title:
|
|
147
|
-
conflicting_page_id = self.api.page_exists(document.title, space_id=self.api.space_key_to_id(meta.space_key))
|
|
148
|
-
if conflicting_page_id is None:
|
|
149
|
-
title = document.title
|
|
150
|
-
else:
|
|
151
|
-
LOGGER.info(
|
|
152
|
-
"Document title of %s conflicts with Confluence page title of %s",
|
|
153
|
-
path,
|
|
154
|
-
conflicting_page_id,
|
|
155
|
-
)
|
|
208
|
+
title = self._get_unique_title(document, path)
|
|
156
209
|
|
|
157
210
|
# fetch existing page
|
|
158
211
|
page = self.api.get_page(page_id.page_id)
|
|
@@ -183,6 +236,41 @@ class SynchronizingProcessor(Processor):
|
|
|
183
236
|
if document.properties is not None:
|
|
184
237
|
self.api.update_content_properties_for_page(page_id.page_id, [ConfluenceContentProperty(key, value) for key, value in document.properties.items()])
|
|
185
238
|
|
|
239
|
+
def _get_extended_title(self, title: str) -> str:
|
|
240
|
+
"""
|
|
241
|
+
Returns a title with the title prefix applied (if any).
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
if self.options.title_prefix is not None:
|
|
245
|
+
return f"{self.options.title_prefix} {title}"
|
|
246
|
+
else:
|
|
247
|
+
return title
|
|
248
|
+
|
|
249
|
+
def _get_unique_title(self, document: ConfluenceDocument, path: Path) -> str | None:
|
|
250
|
+
"""
|
|
251
|
+
Determines the (new) document title to assign to the Confluence page.
|
|
252
|
+
|
|
253
|
+
Ensures that the title is unique across the Confluence space.
|
|
254
|
+
"""
|
|
255
|
+
|
|
256
|
+
# document has no title (neither in front-matter nor as unique top-level heading)
|
|
257
|
+
if document.title is None:
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
# add configured title prefix
|
|
261
|
+
title = self._get_extended_title(document.title)
|
|
262
|
+
|
|
263
|
+
# compare current document title with title discovered during directory traversal
|
|
264
|
+
meta = self.page_metadata.get(path)
|
|
265
|
+
if meta is not None and meta.title != title:
|
|
266
|
+
# title has changed, check if new title is available
|
|
267
|
+
page_id = self.api.page_exists(title, space_id=self.api.space_key_to_id(meta.space_key))
|
|
268
|
+
if page_id is not None:
|
|
269
|
+
LOGGER.info("Unrelated Confluence page with ID %s has the same inferred title as the Markdown file: %s", page_id, path)
|
|
270
|
+
return None
|
|
271
|
+
|
|
272
|
+
return title
|
|
273
|
+
|
|
186
274
|
def _update_markdown(self, path: Path, *, page_id: str, space_key: str) -> None:
|
|
187
275
|
"""
|
|
188
276
|
Writes the Confluence page ID and space key at the beginning of the Markdown file.
|
|
@@ -212,7 +300,7 @@ class SynchronizingProcessor(Processor):
|
|
|
212
300
|
class SynchronizingProcessorFactory(ProcessorFactory):
|
|
213
301
|
api: ConfluenceSession
|
|
214
302
|
|
|
215
|
-
def __init__(self, api: ConfluenceSession, options:
|
|
303
|
+
def __init__(self, api: ConfluenceSession, options: DocumentOptions) -> None:
|
|
216
304
|
super().__init__(options, api.site)
|
|
217
305
|
self.api = api
|
|
218
306
|
|
|
@@ -227,5 +315,5 @@ class Publisher(Converter):
|
|
|
227
315
|
This is the class instantiated by the command-line application.
|
|
228
316
|
"""
|
|
229
317
|
|
|
230
|
-
def __init__(self, api: ConfluenceSession, options:
|
|
318
|
+
def __init__(self, api: ConfluenceSession, options: DocumentOptions) -> None:
|
|
231
319
|
super().__init__(SynchronizingProcessorFactory(api, options))
|