markdown-to-confluence 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/METADATA +24 -11
- markdown_to_confluence-0.3.5.dist-info/RECORD +23 -0
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/WHEEL +1 -1
- md2conf/__init__.py +1 -1
- md2conf/__main__.py +6 -5
- md2conf/api.py +235 -45
- md2conf/application.py +100 -182
- md2conf/converter.py +53 -112
- md2conf/local.py +125 -0
- md2conf/matcher.py +54 -13
- md2conf/mermaid.py +10 -4
- md2conf/metadata.py +42 -0
- md2conf/processor.py +158 -90
- md2conf/scanner.py +117 -0
- markdown_to_confluence-0.3.3.dist-info/RECORD +0 -20
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/entry_points.txt +0 -0
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/licenses/LICENSE +0 -0
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/top_level.txt +0 -0
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/zip-safe +0 -0
md2conf/application.py
CHANGED
|
@@ -8,7 +8,6 @@ Copyright 2022-2025, Levente Hunyadi
|
|
|
8
8
|
|
|
9
9
|
import hashlib
|
|
10
10
|
import logging
|
|
11
|
-
import os
|
|
12
11
|
from pathlib import Path
|
|
13
12
|
from typing import Optional
|
|
14
13
|
|
|
@@ -16,213 +15,78 @@ from .api import ConfluencePage, ConfluenceSession
|
|
|
16
15
|
from .converter import (
|
|
17
16
|
ConfluenceDocument,
|
|
18
17
|
ConfluenceDocumentOptions,
|
|
19
|
-
|
|
20
|
-
ConfluenceQualifiedID,
|
|
21
|
-
ConfluenceSiteMetadata,
|
|
18
|
+
ConfluencePageID,
|
|
22
19
|
attachment_name,
|
|
23
|
-
extract_frontmatter_title,
|
|
24
|
-
extract_qualified_id,
|
|
25
|
-
read_qualified_id,
|
|
26
20
|
)
|
|
27
|
-
from .
|
|
28
|
-
from .
|
|
21
|
+
from .metadata import ConfluencePageMetadata
|
|
22
|
+
from .processor import Converter, Processor, ProcessorFactory
|
|
23
|
+
from .properties import PageError
|
|
24
|
+
from .scanner import Scanner
|
|
29
25
|
|
|
30
26
|
LOGGER = logging.getLogger(__name__)
|
|
31
27
|
|
|
32
28
|
|
|
33
|
-
class
|
|
34
|
-
"
|
|
29
|
+
class SynchronizingProcessor(Processor):
|
|
30
|
+
"""
|
|
31
|
+
Synchronizes a single Markdown page or a directory of Markdown pages with Confluence.
|
|
32
|
+
"""
|
|
35
33
|
|
|
36
34
|
api: ConfluenceSession
|
|
37
|
-
options: ConfluenceDocumentOptions
|
|
38
35
|
|
|
39
36
|
def __init__(
|
|
40
|
-
self, api: ConfluenceSession, options: ConfluenceDocumentOptions
|
|
41
|
-
) -> None:
|
|
42
|
-
self.api = api
|
|
43
|
-
self.options = options
|
|
44
|
-
|
|
45
|
-
def synchronize(self, path: Path) -> None:
|
|
46
|
-
"Synchronizes a single Markdown page or a directory of Markdown pages."
|
|
47
|
-
|
|
48
|
-
path = path.resolve(True)
|
|
49
|
-
if path.is_dir():
|
|
50
|
-
self.synchronize_directory(path)
|
|
51
|
-
elif path.is_file():
|
|
52
|
-
self.synchronize_page(path)
|
|
53
|
-
else:
|
|
54
|
-
raise ArgumentError(f"expected: valid file or directory path; got: {path}")
|
|
55
|
-
|
|
56
|
-
def synchronize_page(
|
|
57
|
-
self, page_path: Path, root_dir: Optional[Path] = None
|
|
58
|
-
) -> None:
|
|
59
|
-
"Synchronizes a single Markdown page with Confluence."
|
|
60
|
-
|
|
61
|
-
page_path = page_path.resolve(True)
|
|
62
|
-
if root_dir is None:
|
|
63
|
-
root_dir = page_path.parent
|
|
64
|
-
else:
|
|
65
|
-
root_dir = root_dir.resolve(True)
|
|
66
|
-
|
|
67
|
-
self._synchronize_page(page_path, root_dir, {})
|
|
68
|
-
|
|
69
|
-
def synchronize_directory(
|
|
70
|
-
self, local_dir: Path, root_dir: Optional[Path] = None
|
|
71
|
-
) -> None:
|
|
72
|
-
"Synchronizes a directory of Markdown pages with Confluence."
|
|
73
|
-
|
|
74
|
-
local_dir = local_dir.resolve(True)
|
|
75
|
-
if root_dir is None:
|
|
76
|
-
root_dir = local_dir
|
|
77
|
-
else:
|
|
78
|
-
root_dir = root_dir.resolve(True)
|
|
79
|
-
|
|
80
|
-
LOGGER.info("Synchronizing directory: %s", local_dir)
|
|
81
|
-
|
|
82
|
-
# Step 1: build index of all page metadata
|
|
83
|
-
page_metadata: dict[Path, ConfluencePageMetadata] = {}
|
|
84
|
-
root_id = (
|
|
85
|
-
ConfluenceQualifiedID(self.options.root_page_id, self.api.space_key)
|
|
86
|
-
if self.options.root_page_id
|
|
87
|
-
else None
|
|
88
|
-
)
|
|
89
|
-
self._index_directory(local_dir, root_dir, root_id, page_metadata)
|
|
90
|
-
LOGGER.info("Indexed %d page(s)", len(page_metadata))
|
|
91
|
-
|
|
92
|
-
# Step 2: convert each page
|
|
93
|
-
for page_path in page_metadata.keys():
|
|
94
|
-
self._synchronize_page(page_path, root_dir, page_metadata)
|
|
95
|
-
|
|
96
|
-
def _synchronize_page(
|
|
97
|
-
self,
|
|
98
|
-
page_path: Path,
|
|
99
|
-
root_dir: Path,
|
|
100
|
-
page_metadata: dict[Path, ConfluencePageMetadata],
|
|
101
|
-
) -> None:
|
|
102
|
-
base_path = page_path.parent
|
|
103
|
-
|
|
104
|
-
LOGGER.info("Synchronizing page: %s", page_path)
|
|
105
|
-
site_metadata = ConfluenceSiteMetadata(
|
|
106
|
-
domain=self.api.domain,
|
|
107
|
-
base_path=self.api.base_path,
|
|
108
|
-
space_key=self.api.space_key,
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
document = ConfluenceDocument.create(
|
|
112
|
-
page_path, self.options, root_dir, site_metadata, page_metadata
|
|
113
|
-
)
|
|
114
|
-
self._update_document(document, base_path)
|
|
115
|
-
|
|
116
|
-
def _index_directory(
|
|
117
|
-
self,
|
|
118
|
-
local_dir: Path,
|
|
119
|
-
root_dir: Path,
|
|
120
|
-
root_id: Optional[ConfluenceQualifiedID],
|
|
121
|
-
page_metadata: dict[Path, ConfluencePageMetadata],
|
|
37
|
+
self, api: ConfluenceSession, options: ConfluenceDocumentOptions, root_dir: Path
|
|
122
38
|
) -> None:
|
|
123
|
-
"
|
|
124
|
-
|
|
125
|
-
LOGGER.info("Indexing directory: %s", local_dir)
|
|
126
|
-
|
|
127
|
-
matcher = Matcher(MatcherOptions(source=".mdignore", extension="md"), local_dir)
|
|
128
|
-
|
|
129
|
-
files: list[Path] = []
|
|
130
|
-
directories: list[Path] = []
|
|
131
|
-
for entry in os.scandir(local_dir):
|
|
132
|
-
if matcher.is_excluded(entry.name, entry.is_dir()):
|
|
133
|
-
continue
|
|
134
|
-
|
|
135
|
-
if entry.is_file():
|
|
136
|
-
files.append(Path(local_dir) / entry.name)
|
|
137
|
-
elif entry.is_dir():
|
|
138
|
-
directories.append(Path(local_dir) / entry.name)
|
|
139
|
-
|
|
140
|
-
# make page act as parent node in Confluence
|
|
141
|
-
parent_doc: Optional[Path] = None
|
|
142
|
-
if (Path(local_dir) / "index.md") in files:
|
|
143
|
-
parent_doc = Path(local_dir) / "index.md"
|
|
144
|
-
elif (Path(local_dir) / "README.md") in files:
|
|
145
|
-
parent_doc = Path(local_dir) / "README.md"
|
|
146
|
-
elif (Path(local_dir) / f"{local_dir.name}.md") in files:
|
|
147
|
-
parent_doc = Path(local_dir) / f"{local_dir.name}.md"
|
|
148
|
-
|
|
149
|
-
if parent_doc is None and self.options.keep_hierarchy:
|
|
150
|
-
parent_doc = Path(local_dir) / "index.md"
|
|
151
|
-
|
|
152
|
-
# create a blank page in Confluence for the directory entry
|
|
153
|
-
with open(parent_doc, "w"):
|
|
154
|
-
pass
|
|
155
|
-
|
|
156
|
-
if parent_doc is not None:
|
|
157
|
-
files.remove(parent_doc)
|
|
158
|
-
|
|
159
|
-
metadata = self._get_or_create_page(parent_doc, root_dir, root_id)
|
|
160
|
-
LOGGER.debug("Indexed parent %s with metadata: %s", parent_doc, metadata)
|
|
161
|
-
page_metadata[parent_doc] = metadata
|
|
162
|
-
|
|
163
|
-
parent_id = read_qualified_id(parent_doc) or root_id
|
|
164
|
-
else:
|
|
165
|
-
parent_id = root_id
|
|
39
|
+
"""
|
|
40
|
+
Initializes a new processor instance.
|
|
166
41
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
42
|
+
:param api: Holds information about an open session to a Confluence server.
|
|
43
|
+
:param options: Options that control the generated page content.
|
|
44
|
+
:param root_dir: File system directory that acts as topmost root node.
|
|
45
|
+
"""
|
|
171
46
|
|
|
172
|
-
|
|
173
|
-
|
|
47
|
+
super().__init__(options, api.site, root_dir)
|
|
48
|
+
self.api = api
|
|
174
49
|
|
|
175
50
|
def _get_or_create_page(
|
|
176
|
-
self,
|
|
177
|
-
absolute_path: Path,
|
|
178
|
-
root_dir: Path,
|
|
179
|
-
parent_id: Optional[ConfluenceQualifiedID],
|
|
180
|
-
*,
|
|
181
|
-
title: Optional[str] = None,
|
|
51
|
+
self, absolute_path: Path, parent_id: Optional[ConfluencePageID]
|
|
182
52
|
) -> ConfluencePageMetadata:
|
|
183
53
|
"""
|
|
184
54
|
Creates a new Confluence page if no page is linked in the Markdown document.
|
|
185
55
|
"""
|
|
186
56
|
|
|
187
57
|
# parse file
|
|
188
|
-
|
|
189
|
-
document = f.read()
|
|
190
|
-
|
|
191
|
-
qualified_id, document = extract_qualified_id(document)
|
|
58
|
+
document = Scanner().read(absolute_path)
|
|
192
59
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
60
|
+
overwrite = False
|
|
61
|
+
if document.page_id is None:
|
|
62
|
+
# create new Confluence page
|
|
196
63
|
if parent_id is None:
|
|
197
64
|
raise PageError(
|
|
198
65
|
f"expected: parent page ID for Markdown file with no linked Confluence page: {absolute_path}"
|
|
199
66
|
)
|
|
200
67
|
|
|
201
|
-
# assign title from front-matter if present
|
|
202
|
-
if title is None:
|
|
203
|
-
title, _ = extract_frontmatter_title(document)
|
|
204
|
-
|
|
205
68
|
# use file name (without extension) and path hash if no title is supplied
|
|
206
|
-
if title is None:
|
|
207
|
-
|
|
69
|
+
if document.title is not None:
|
|
70
|
+
title = document.title
|
|
71
|
+
else:
|
|
72
|
+
overwrite = True
|
|
73
|
+
relative_path = absolute_path.relative_to(self.root_dir)
|
|
208
74
|
hash = hashlib.md5(relative_path.as_posix().encode("utf-8"))
|
|
209
75
|
digest = "".join(f"{c:x}" for c in hash.digest())
|
|
210
76
|
title = f"{absolute_path.stem} [{digest}]"
|
|
211
77
|
|
|
212
78
|
confluence_page = self._create_page(
|
|
213
|
-
absolute_path, document, title, parent_id
|
|
79
|
+
absolute_path, document.text, title, parent_id
|
|
214
80
|
)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
self.api.
|
|
218
|
-
if confluence_page.space_id
|
|
219
|
-
else self.api.space_key
|
|
220
|
-
)
|
|
81
|
+
else:
|
|
82
|
+
# look up existing Confluence page
|
|
83
|
+
confluence_page = self.api.get_page(document.page_id)
|
|
221
84
|
|
|
222
85
|
return ConfluencePageMetadata(
|
|
223
86
|
page_id=confluence_page.id,
|
|
224
|
-
space_key=
|
|
225
|
-
title=confluence_page.title
|
|
87
|
+
space_key=self.api.space_id_to_key(confluence_page.space_id),
|
|
88
|
+
title=confluence_page.title,
|
|
89
|
+
overwrite=overwrite,
|
|
226
90
|
)
|
|
227
91
|
|
|
228
92
|
def _create_page(
|
|
@@ -230,13 +94,13 @@ class Application:
|
|
|
230
94
|
absolute_path: Path,
|
|
231
95
|
document: str,
|
|
232
96
|
title: str,
|
|
233
|
-
parent_id:
|
|
97
|
+
parent_id: ConfluencePageID,
|
|
234
98
|
) -> ConfluencePage:
|
|
235
|
-
"
|
|
99
|
+
"""
|
|
100
|
+
Creates a new Confluence page when Markdown file doesn't have an embedded page ID yet.
|
|
101
|
+
"""
|
|
236
102
|
|
|
237
|
-
confluence_page = self.api.get_or_create_page(
|
|
238
|
-
title, parent_id.page_id, space_key=parent_id.space_key
|
|
239
|
-
)
|
|
103
|
+
confluence_page = self.api.get_or_create_page(title, parent_id.page_id)
|
|
240
104
|
self._update_markdown(
|
|
241
105
|
absolute_path,
|
|
242
106
|
document,
|
|
@@ -245,26 +109,52 @@ class Application:
|
|
|
245
109
|
)
|
|
246
110
|
return confluence_page
|
|
247
111
|
|
|
248
|
-
def
|
|
249
|
-
|
|
112
|
+
def _save_document(
|
|
113
|
+
self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
|
|
114
|
+
) -> None:
|
|
115
|
+
"""
|
|
116
|
+
Saves a new version of a Confluence document.
|
|
117
|
+
|
|
118
|
+
Invokes Confluence REST API to persist the new version.
|
|
119
|
+
"""
|
|
250
120
|
|
|
121
|
+
base_path = path.parent
|
|
251
122
|
for image in document.images:
|
|
252
123
|
self.api.upload_attachment(
|
|
253
|
-
|
|
124
|
+
page_id.page_id,
|
|
254
125
|
attachment_name(image),
|
|
255
126
|
attachment_path=base_path / image,
|
|
256
127
|
)
|
|
257
128
|
|
|
258
129
|
for name, data in document.embedded_images.items():
|
|
259
130
|
self.api.upload_attachment(
|
|
260
|
-
|
|
131
|
+
page_id.page_id,
|
|
261
132
|
name,
|
|
262
133
|
raw_data=data,
|
|
263
134
|
)
|
|
264
135
|
|
|
265
136
|
content = document.xhtml()
|
|
266
137
|
LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
|
|
267
|
-
|
|
138
|
+
|
|
139
|
+
title = None
|
|
140
|
+
if document.title is not None:
|
|
141
|
+
meta = self.page_metadata[path]
|
|
142
|
+
|
|
143
|
+
# update title only for pages with randomly assigned title
|
|
144
|
+
if meta.overwrite:
|
|
145
|
+
conflicting_page_id = self.api.page_exists(
|
|
146
|
+
document.title, space_id=self.api.space_key_to_id(meta.space_key)
|
|
147
|
+
)
|
|
148
|
+
if conflicting_page_id is None:
|
|
149
|
+
title = document.title
|
|
150
|
+
else:
|
|
151
|
+
LOGGER.info(
|
|
152
|
+
"Document title of %s conflicts with Confluence page title of %s",
|
|
153
|
+
path,
|
|
154
|
+
conflicting_page_id,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
self.api.update_page(page_id.page_id, content, title=title)
|
|
268
158
|
|
|
269
159
|
def _update_markdown(
|
|
270
160
|
self,
|
|
@@ -273,7 +163,9 @@ class Application:
|
|
|
273
163
|
page_id: str,
|
|
274
164
|
space_key: Optional[str],
|
|
275
165
|
) -> None:
|
|
276
|
-
"
|
|
166
|
+
"""
|
|
167
|
+
Writes the Confluence page ID and space key at the beginning of the Markdown file.
|
|
168
|
+
"""
|
|
277
169
|
|
|
278
170
|
content: list[str] = []
|
|
279
171
|
|
|
@@ -293,3 +185,29 @@ class Application:
|
|
|
293
185
|
|
|
294
186
|
with open(path, "w", encoding="utf-8") as file:
|
|
295
187
|
file.write("\n".join(content))
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class SynchronizingProcessorFactory(ProcessorFactory):
|
|
191
|
+
api: ConfluenceSession
|
|
192
|
+
|
|
193
|
+
def __init__(
|
|
194
|
+
self, api: ConfluenceSession, options: ConfluenceDocumentOptions
|
|
195
|
+
) -> None:
|
|
196
|
+
super().__init__(options, api.site)
|
|
197
|
+
self.api = api
|
|
198
|
+
|
|
199
|
+
def create(self, root_dir: Path) -> Processor:
|
|
200
|
+
return SynchronizingProcessor(self.api, self.options, root_dir)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class Application(Converter):
|
|
204
|
+
"""
|
|
205
|
+
The entry point for Markdown to Confluence conversion.
|
|
206
|
+
|
|
207
|
+
This is the class instantiated by the command-line application.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
def __init__(
|
|
211
|
+
self, api: ConfluenceSession, options: ConfluenceDocumentOptions
|
|
212
|
+
) -> None:
|
|
213
|
+
super().__init__(SynchronizingProcessorFactory(api, options))
|
md2conf/converter.py
CHANGED
|
@@ -18,15 +18,16 @@ import xml.etree.ElementTree
|
|
|
18
18
|
from dataclasses import dataclass
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
from typing import Any, Literal, Optional, Union
|
|
21
|
-
from urllib.parse import ParseResult, urlparse, urlunparse
|
|
21
|
+
from urllib.parse import ParseResult, quote_plus, urlparse, urlunparse
|
|
22
22
|
|
|
23
23
|
import lxml.etree as ET
|
|
24
24
|
import markdown
|
|
25
|
-
import yaml
|
|
26
25
|
from lxml.builder import ElementMaker
|
|
27
26
|
|
|
28
27
|
from .mermaid import render_diagram
|
|
28
|
+
from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
|
|
29
29
|
from .properties import PageError
|
|
30
|
+
from .scanner import ScannedDocument, Scanner
|
|
30
31
|
|
|
31
32
|
namespaces = {
|
|
32
33
|
"ac": "http://atlassian.com/content",
|
|
@@ -65,6 +66,19 @@ def is_relative_url(url: str) -> bool:
|
|
|
65
66
|
return not bool(urlparts.scheme) and not bool(urlparts.netloc)
|
|
66
67
|
|
|
67
68
|
|
|
69
|
+
def encode_title(text: str) -> str:
|
|
70
|
+
"Converts a title string such that it is safe to embed into a Confluence URL."
|
|
71
|
+
|
|
72
|
+
# replace unsafe characters with space
|
|
73
|
+
text = re.sub(r"[^A-Za-z0-9._~()'!*:@,;+?-]+", " ", text)
|
|
74
|
+
|
|
75
|
+
# replace multiple consecutive spaces with single space
|
|
76
|
+
text = re.sub(r"\s\s+", " ", text)
|
|
77
|
+
|
|
78
|
+
# URL-encode
|
|
79
|
+
return quote_plus(text.strip())
|
|
80
|
+
|
|
81
|
+
|
|
68
82
|
def emoji_generator(
|
|
69
83
|
index: str,
|
|
70
84
|
shortname: str,
|
|
@@ -142,8 +156,8 @@ def _elements_from_strings(dtd_path: Path, items: list[str]) -> ET._Element:
|
|
|
142
156
|
|
|
143
157
|
try:
|
|
144
158
|
return ET.fromstringlist(data, parser=parser)
|
|
145
|
-
except ET.XMLSyntaxError as
|
|
146
|
-
raise ParseError(
|
|
159
|
+
except ET.XMLSyntaxError as ex:
|
|
160
|
+
raise ParseError() from ex
|
|
147
161
|
|
|
148
162
|
|
|
149
163
|
def elements_from_strings(items: list[str]) -> ET._Element:
|
|
@@ -240,20 +254,6 @@ _languages = [
|
|
|
240
254
|
]
|
|
241
255
|
|
|
242
256
|
|
|
243
|
-
@dataclass
|
|
244
|
-
class ConfluenceSiteMetadata:
|
|
245
|
-
domain: str
|
|
246
|
-
base_path: str
|
|
247
|
-
space_key: Optional[str]
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
@dataclass
|
|
251
|
-
class ConfluencePageMetadata:
|
|
252
|
-
page_id: str
|
|
253
|
-
space_key: Optional[str]
|
|
254
|
-
title: str
|
|
255
|
-
|
|
256
|
-
|
|
257
257
|
class NodeVisitor:
|
|
258
258
|
def visit(self, node: ET._Element) -> None:
|
|
259
259
|
"Recursively visits all descendants of this node."
|
|
@@ -479,7 +479,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
|
|
|
479
479
|
"Confluence space key required for building full web URLs"
|
|
480
480
|
)
|
|
481
481
|
|
|
482
|
-
page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{link_metadata.title}"
|
|
482
|
+
page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{encode_title(link_metadata.title)}"
|
|
483
483
|
|
|
484
484
|
components = ParseResult(
|
|
485
485
|
scheme="https",
|
|
@@ -962,70 +962,15 @@ class DocumentError(RuntimeError):
|
|
|
962
962
|
"Raised when a converted Markdown document has an unexpected element or attribute."
|
|
963
963
|
|
|
964
964
|
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
def _repl_func(matchobj: re.Match) -> str:
|
|
969
|
-
values.append(matchobj.group(1))
|
|
970
|
-
return ""
|
|
971
|
-
|
|
972
|
-
text = re.sub(pattern, _repl_func, text, 1, re.ASCII)
|
|
973
|
-
value = values[0] if values else None
|
|
974
|
-
return value, text
|
|
965
|
+
@dataclass
|
|
966
|
+
class ConfluencePageID:
|
|
967
|
+
page_id: str
|
|
975
968
|
|
|
976
969
|
|
|
977
970
|
@dataclass
|
|
978
971
|
class ConfluenceQualifiedID:
|
|
979
972
|
page_id: str
|
|
980
|
-
space_key:
|
|
981
|
-
|
|
982
|
-
def __init__(self, page_id: str, space_key: Optional[str] = None):
|
|
983
|
-
self.page_id = page_id
|
|
984
|
-
self.space_key = space_key
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
def extract_qualified_id(text: str) -> tuple[Optional[ConfluenceQualifiedID], str]:
|
|
988
|
-
"Extracts the Confluence page ID and space key from a Markdown document."
|
|
989
|
-
|
|
990
|
-
page_id, text = extract_value(r"<!--\s+confluence-page-id:\s*(\d+)\s+-->", text)
|
|
991
|
-
|
|
992
|
-
if page_id is None:
|
|
993
|
-
return None, text
|
|
994
|
-
|
|
995
|
-
# extract Confluence space key
|
|
996
|
-
space_key, text = extract_value(r"<!--\s+confluence-space-key:\s*(\S+)\s+-->", text)
|
|
997
|
-
|
|
998
|
-
return ConfluenceQualifiedID(page_id, space_key), text
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
def extract_frontmatter(text: str) -> tuple[Optional[str], str]:
|
|
1002
|
-
"Extracts the front matter from a Markdown document."
|
|
1003
|
-
|
|
1004
|
-
return extract_value(r"(?ms)\A---$(.+?)^---$", text)
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
def extract_frontmatter_title(text: str) -> tuple[Optional[str], str]:
|
|
1008
|
-
frontmatter, text = extract_frontmatter(text)
|
|
1009
|
-
|
|
1010
|
-
title: Optional[str] = None
|
|
1011
|
-
if frontmatter is not None:
|
|
1012
|
-
properties = yaml.safe_load(frontmatter)
|
|
1013
|
-
if isinstance(properties, dict):
|
|
1014
|
-
property_title = properties.get("title")
|
|
1015
|
-
if isinstance(property_title, str):
|
|
1016
|
-
title = property_title
|
|
1017
|
-
|
|
1018
|
-
return title, text
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
def read_qualified_id(absolute_path: Path) -> Optional[ConfluenceQualifiedID]:
|
|
1022
|
-
"Reads the Confluence page ID and space key from a Markdown document."
|
|
1023
|
-
|
|
1024
|
-
with open(absolute_path, "r", encoding="utf-8") as f:
|
|
1025
|
-
document = f.read()
|
|
1026
|
-
|
|
1027
|
-
qualified_id, _ = extract_qualified_id(document)
|
|
1028
|
-
return qualified_id
|
|
973
|
+
space_key: str
|
|
1029
974
|
|
|
1030
975
|
|
|
1031
976
|
@dataclass
|
|
@@ -1048,15 +993,18 @@ class ConfluenceDocumentOptions:
|
|
|
1048
993
|
ignore_invalid_url: bool = False
|
|
1049
994
|
heading_anchors: bool = False
|
|
1050
995
|
generated_by: Optional[str] = "This page has been generated with a tool."
|
|
1051
|
-
root_page_id: Optional[
|
|
996
|
+
root_page_id: Optional[ConfluencePageID] = None
|
|
1052
997
|
keep_hierarchy: bool = False
|
|
1053
998
|
render_mermaid: bool = False
|
|
1054
999
|
diagram_output_format: Literal["png", "svg"] = "png"
|
|
1055
1000
|
webui_links: bool = False
|
|
1056
1001
|
|
|
1057
1002
|
|
|
1003
|
+
class ConversionError(RuntimeError):
|
|
1004
|
+
"Raised when a Markdown document cannot be converted to Confluence Storage Format."
|
|
1005
|
+
|
|
1006
|
+
|
|
1058
1007
|
class ConfluenceDocument:
|
|
1059
|
-
id: ConfluenceQualifiedID
|
|
1060
1008
|
title: Optional[str]
|
|
1061
1009
|
links: list[str]
|
|
1062
1010
|
images: list[Path]
|
|
@@ -1072,67 +1020,61 @@ class ConfluenceDocument:
|
|
|
1072
1020
|
root_dir: Path,
|
|
1073
1021
|
site_metadata: ConfluenceSiteMetadata,
|
|
1074
1022
|
page_metadata: dict[Path, ConfluencePageMetadata],
|
|
1075
|
-
) -> "ConfluenceDocument":
|
|
1023
|
+
) -> tuple[ConfluencePageID, "ConfluenceDocument"]:
|
|
1076
1024
|
path = path.resolve(True)
|
|
1077
1025
|
|
|
1078
|
-
|
|
1079
|
-
text = f.read()
|
|
1026
|
+
document = Scanner().read(path)
|
|
1080
1027
|
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1028
|
+
if document.page_id is not None:
|
|
1029
|
+
page_id = ConfluencePageID(document.page_id)
|
|
1030
|
+
else:
|
|
1084
1031
|
# look up Confluence page ID in metadata
|
|
1085
1032
|
metadata = page_metadata.get(path)
|
|
1086
1033
|
if metadata is not None:
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
)
|
|
1090
|
-
if qualified_id is None:
|
|
1091
|
-
raise PageError("missing Confluence page ID")
|
|
1034
|
+
page_id = ConfluencePageID(metadata.page_id)
|
|
1035
|
+
else:
|
|
1036
|
+
raise PageError("missing Confluence page ID")
|
|
1092
1037
|
|
|
1093
|
-
return ConfluenceDocument(
|
|
1094
|
-
path,
|
|
1038
|
+
return page_id, ConfluenceDocument(
|
|
1039
|
+
path, document, options, root_dir, site_metadata, page_metadata
|
|
1095
1040
|
)
|
|
1096
1041
|
|
|
1097
1042
|
def __init__(
|
|
1098
1043
|
self,
|
|
1099
1044
|
path: Path,
|
|
1100
|
-
|
|
1101
|
-
qualified_id: ConfluenceQualifiedID,
|
|
1045
|
+
document: ScannedDocument,
|
|
1102
1046
|
options: ConfluenceDocumentOptions,
|
|
1103
1047
|
root_dir: Path,
|
|
1104
1048
|
site_metadata: ConfluenceSiteMetadata,
|
|
1105
1049
|
page_metadata: dict[Path, ConfluencePageMetadata],
|
|
1106
1050
|
) -> None:
|
|
1107
1051
|
self.options = options
|
|
1108
|
-
self.id = qualified_id
|
|
1109
|
-
|
|
1110
|
-
# extract 'generated-by' tag text
|
|
1111
|
-
generated_by_tag, text = extract_value(
|
|
1112
|
-
r"<!--\s+generated-by:\s*(.*)\s+-->", text
|
|
1113
|
-
)
|
|
1114
|
-
|
|
1115
|
-
# extract frontmatter
|
|
1116
|
-
self.title, text = extract_frontmatter_title(text)
|
|
1117
1052
|
|
|
1118
1053
|
# convert to HTML
|
|
1119
|
-
html = markdown_to_html(text)
|
|
1054
|
+
html = markdown_to_html(document.text)
|
|
1120
1055
|
|
|
1121
1056
|
# parse Markdown document
|
|
1122
1057
|
if self.options.generated_by is not None:
|
|
1123
|
-
generated_by = self.options.generated_by
|
|
1124
|
-
|
|
1125
|
-
|
|
1058
|
+
generated_by = document.generated_by or self.options.generated_by
|
|
1059
|
+
else:
|
|
1060
|
+
generated_by = None
|
|
1061
|
+
|
|
1062
|
+
if generated_by is not None:
|
|
1063
|
+
generated_by_html = markdown_to_html(generated_by)
|
|
1126
1064
|
|
|
1127
1065
|
content = [
|
|
1128
1066
|
'<ac:structured-macro ac:name="info" ac:schema-version="1">',
|
|
1129
|
-
f"<ac:rich-text-body
|
|
1067
|
+
f"<ac:rich-text-body>{generated_by_html}</ac:rich-text-body>",
|
|
1130
1068
|
"</ac:structured-macro>",
|
|
1131
1069
|
html,
|
|
1132
1070
|
]
|
|
1133
1071
|
else:
|
|
1134
1072
|
content = [html]
|
|
1135
|
-
|
|
1073
|
+
|
|
1074
|
+
try:
|
|
1075
|
+
self.root = elements_from_strings(content)
|
|
1076
|
+
except ParseError as ex:
|
|
1077
|
+
raise ConversionError(path) from ex
|
|
1136
1078
|
|
|
1137
1079
|
converter = ConfluenceStorageFormatConverter(
|
|
1138
1080
|
ConfluenceConverterOptions(
|
|
@@ -1152,8 +1094,7 @@ class ConfluenceDocument:
|
|
|
1152
1094
|
self.images = converter.images
|
|
1153
1095
|
self.embedded_images = converter.embedded_images
|
|
1154
1096
|
|
|
1155
|
-
|
|
1156
|
-
self.title = converter.toc.get_title()
|
|
1097
|
+
self.title = document.title or converter.toc.get_title()
|
|
1157
1098
|
|
|
1158
1099
|
def xhtml(self) -> str:
|
|
1159
1100
|
return elements_to_string(self.root)
|