markdown-to-confluence 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/local.py ADDED
@@ -0,0 +1,125 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ import hashlib
10
+ import logging
11
+ import os
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
16
+ from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
17
+ from .processor import Converter, Processor, ProcessorFactory
18
+ from .properties import PageError
19
+ from .scanner import Scanner
20
+
21
+ LOGGER = logging.getLogger(__name__)
22
+
23
+
24
+ class LocalProcessor(Processor):
25
+ """
26
+ Transforms a single Markdown page or a directory of Markdown pages into Confluence Storage Format (CSF) documents.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ options: ConfluenceDocumentOptions,
32
+ site: ConfluenceSiteMetadata,
33
+ *,
34
+ out_dir: Optional[Path],
35
+ root_dir: Path,
36
+ ) -> None:
37
+ """
38
+ Initializes a new processor instance.
39
+
40
+ :param options: Options that control the generated page content.
41
+ :param site: Data associated with a Confluence wiki site.
42
+ :param out_dir: File system directory to write generated CSF documents to.
43
+ :param root_dir: File system directory that acts as topmost root node.
44
+ """
45
+
46
+ super().__init__(options, site, root_dir)
47
+ self.out_dir = out_dir or root_dir
48
+
49
+ def _get_or_create_page(
50
+ self, absolute_path: Path, parent_id: Optional[ConfluencePageID]
51
+ ) -> ConfluencePageMetadata:
52
+ """
53
+ Extracts metadata from a Markdown file.
54
+ """
55
+
56
+ # parse file
57
+ document = Scanner().read(absolute_path)
58
+ if document.page_id is not None:
59
+ page_id = document.page_id
60
+ space_key = document.space_key or self.site.space_key or "HOME"
61
+ else:
62
+ if parent_id is None:
63
+ raise PageError(
64
+ f"expected: parent page ID for Markdown file with no linked Confluence page: {absolute_path}"
65
+ )
66
+
67
+ hash = hashlib.md5(document.text.encode("utf-8"))
68
+ digest = "".join(f"{c:x}" for c in hash.digest())
69
+ LOGGER.info("Identifier %s assigned to page: %s", digest, absolute_path)
70
+ page_id = digest
71
+ space_key = self.site.space_key or "HOME"
72
+
73
+ return ConfluencePageMetadata(
74
+ page_id=page_id,
75
+ space_key=space_key,
76
+ title="",
77
+ overwrite=True,
78
+ )
79
+
80
+ def _save_document(
81
+ self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
82
+ ) -> None:
83
+ """
84
+ Saves a new version of a Confluence document.
85
+
86
+ A derived class may invoke Confluence REST API to persist the new version.
87
+ """
88
+
89
+ content = document.xhtml()
90
+ out_path = self.out_dir / path.relative_to(self.root_dir).with_suffix(".csf")
91
+ os.makedirs(out_path.parent, exist_ok=True)
92
+ with open(out_path, "w", encoding="utf-8") as f:
93
+ f.write(content)
94
+
95
+
96
+ class LocalProcessorFactory(ProcessorFactory):
97
+ out_dir: Optional[Path]
98
+
99
+ def __init__(
100
+ self,
101
+ options: ConfluenceDocumentOptions,
102
+ site: ConfluenceSiteMetadata,
103
+ out_dir: Optional[Path] = None,
104
+ ) -> None:
105
+ super().__init__(options, site)
106
+ self.out_dir = out_dir
107
+
108
+ def create(self, root_dir: Path) -> Processor:
109
+ return LocalProcessor(
110
+ self.options, self.site, out_dir=self.out_dir, root_dir=root_dir
111
+ )
112
+
113
+
114
+ class LocalConverter(Converter):
115
+ """
116
+ The entry point for Markdown to Confluence conversion.
117
+ """
118
+
119
+ def __init__(
120
+ self,
121
+ options: ConfluenceDocumentOptions,
122
+ site: ConfluenceSiteMetadata,
123
+ out_dir: Optional[Path] = None,
124
+ ) -> None:
125
+ super().__init__(LocalProcessorFactory(options, site, out_dir))
md2conf/matcher.py CHANGED
@@ -10,15 +10,15 @@ import os.path
10
10
  from dataclasses import dataclass
11
11
  from fnmatch import fnmatch
12
12
  from pathlib import Path
13
- from typing import Iterable, Optional
13
+ from typing import Iterable, Optional, Union, overload
14
14
 
15
15
 
16
- @dataclass
16
+ @dataclass(frozen=True)
17
17
  class Entry:
18
18
  """
19
19
  Represents a file or directory entry.
20
20
 
21
- :param name: Name of the file-system entry.
21
+ :param name: Name of the file-system entry to match against the rule-set.
22
22
  :param is_dir: True if the entry is a directory.
23
23
  """
24
24
 
@@ -43,6 +43,15 @@ class MatcherOptions:
43
43
  self.extension = f".{self.extension}"
44
44
 
45
45
 
46
+ def _entry_name_dir(entry: Union[Entry, os.DirEntry[str]]) -> tuple[str, bool]:
47
+ if isinstance(entry, Entry):
48
+ return entry.name, entry.is_dir
49
+ elif isinstance(entry, os.DirEntry):
50
+ return entry.name, entry.is_dir()
51
+ else:
52
+ raise NotImplementedError("type match not exhaustive")
53
+
54
+
46
55
  class Matcher:
47
56
  "Compares file and directory names against a list of exclude/include patterns."
48
57
 
@@ -58,20 +67,40 @@ class Matcher:
58
67
  else:
59
68
  self.rules = []
60
69
 
70
+ for rule in self.rules:
71
+ if "/" in rule or os.path.sep in rule:
72
+ raise ValueError(f"nested matching not supported: {rule}")
73
+
61
74
  def extension_matches(self, name: str) -> bool:
62
75
  "True if the file name has the expected extension."
63
76
 
64
77
  return self.options.extension is None or name.endswith(self.options.extension)
65
78
 
66
- def is_excluded(self, name: str, is_dir: bool) -> bool:
79
+ @overload
80
+ def is_excluded(self, entry: Entry) -> bool:
81
+ """
82
+ True if the file or directory name matches any of the exclusion patterns.
83
+
84
+ :param entry: A data-class object.
85
+ :returns: True if the name matches at least one of the exclusion patterns.
86
+ """
87
+
88
+ ...
89
+
90
+ @overload
91
+ def is_excluded(self, entry: os.DirEntry[str]) -> bool:
67
92
  """
68
93
  True if the file or directory name matches any of the exclusion patterns.
69
94
 
70
- :param name: Name to match against the rule-set.
71
- :param is_dir: Whether the name identifies a directory.
95
+ :param entry: An object returned by `scandir`.
72
96
  :returns: True if the name matches at least one of the exclusion patterns.
73
97
  """
74
98
 
99
+ ...
100
+
101
+ def is_excluded(self, entry: Union[Entry, os.DirEntry[str]]) -> bool:
102
+ name, is_dir = _entry_name_dir(entry)
103
+
75
104
  # skip hidden files and directories
76
105
  if name.startswith("."):
77
106
  return True
@@ -86,26 +115,38 @@ class Matcher:
86
115
  else:
87
116
  return False
88
117
 
89
- def is_included(self, name: str, is_dir: bool) -> bool:
118
+ @overload
119
+ def is_included(self, entry: Entry) -> bool:
120
+ """
121
+ True if the file or directory name matches none of the exclusion patterns.
122
+
123
+ :param entry: A data-class object.
124
+ :returns: True if the name doesn't match any of the exclusion patterns.
125
+ """
126
+ ...
127
+
128
+ @overload
129
+ def is_included(self, entry: os.DirEntry[str]) -> bool:
90
130
  """
91
131
  True if the file or directory name matches none of the exclusion patterns.
92
132
 
93
- :param name: Name to match against the rule-set.
94
- :param is_dir: Whether the name identifies a directory.
133
+ :param entry: An object returned by `scandir`.
95
134
  :returns: True if the name doesn't match any of the exclusion patterns.
96
135
  """
136
+ ...
97
137
 
98
- return not self.is_excluded(name, is_dir)
138
+ def is_included(self, entry: Union[Entry, os.DirEntry[str]]) -> bool:
139
+ return not self.is_excluded(entry)
99
140
 
100
- def filter(self, items: Iterable[Entry]) -> list[Entry]:
141
+ def filter(self, entries: Iterable[Entry]) -> list[Entry]:
101
142
  """
102
143
  Returns only those elements from the input that don't match any of the exclusion rules.
103
144
 
104
- :param items: A list of names to filter.
145
+ :param entries: A list of names to filter.
105
146
  :returns: A filtered list of names that didn't match any of the exclusion rules.
106
147
  """
107
148
 
108
- return [item for item in items if self.is_included(item.name, item.is_dir)]
149
+ return [entry for entry in entries if self.is_included(entry)]
109
150
 
110
151
  def scandir(self, path: Path) -> list[Entry]:
111
152
  """
md2conf/mermaid.py CHANGED
@@ -79,10 +79,16 @@ def render_diagram(source: str, output_format: Literal["png", "svg"] = "png") ->
79
79
  )
80
80
  stdout, stderr = proc.communicate(input=source.encode("utf-8"))
81
81
  if proc.returncode:
82
- raise RuntimeError(
83
- f"failed to convert Mermaid diagram; exit code: {proc.returncode}, "
84
- f"output:\n{stdout.decode('utf-8')}\n{stderr.decode('utf-8')}"
85
- )
82
+ messages = [
83
+ f"failed to convert Mermaid diagram; exit code: {proc.returncode}"
84
+ ]
85
+ console_output = stdout.decode("utf-8")
86
+ if console_output:
87
+ messages.append(f"output:\n{console_output}")
88
+ console_error = stderr.decode("utf-8")
89
+ if console_error:
90
+ messages.append(f"error:\n{console_error}")
91
+ raise RuntimeError("\n".join(messages))
86
92
  with open(filename, "rb") as image:
87
93
  return image.read()
88
94
 
md2conf/metadata.py ADDED
@@ -0,0 +1,42 @@
1
+ """
2
+ Publish Markdown files to Confluence wiki.
3
+
4
+ Copyright 2022-2025, Levente Hunyadi
5
+
6
+ :see: https://github.com/hunyadi/md2conf
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Optional
11
+
12
+
13
+ @dataclass
14
+ class ConfluenceSiteMetadata:
15
+ """
16
+ Data associated with a Confluence wiki site.
17
+
18
+ :param domain: Confluence organization domain (e.g. `levente-hunyadi.atlassian.net`).
19
+ :param base_path: Base path for Confluence (default: `/wiki/`).
20
+ :param space_key: Confluence space key for new pages (e.g. `~hunyadi` or `INST`).
21
+ """
22
+
23
+ domain: str
24
+ base_path: str
25
+ space_key: Optional[str]
26
+
27
+
28
+ @dataclass
29
+ class ConfluencePageMetadata:
30
+ """
31
+ Data associated with a Confluence page.
32
+
33
+ :param page_id: Confluence page ID.
34
+ :param space_key: Confluence space key.
35
+ :param title: Document title.
36
+ :param overwrite: True if operations are allowed to update document properties (e.g. title).
37
+ """
38
+
39
+ page_id: str
40
+ space_key: str
41
+ title: str
42
+ overwrite: bool
md2conf/processor.py CHANGED
@@ -6,101 +6,94 @@ Copyright 2022-2025, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
- import hashlib
10
9
  import logging
11
10
  import os
11
+ from abc import abstractmethod
12
12
  from pathlib import Path
13
13
  from typing import Optional
14
14
 
15
- from .converter import (
16
- ConfluenceDocument,
17
- ConfluenceDocumentOptions,
18
- ConfluencePageMetadata,
19
- ConfluenceQualifiedID,
20
- ConfluenceSiteMetadata,
21
- extract_qualified_id,
22
- )
15
+ from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
23
16
  from .matcher import Matcher, MatcherOptions
17
+ from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
24
18
  from .properties import ArgumentError
25
19
 
26
20
  LOGGER = logging.getLogger(__name__)
27
21
 
28
22
 
29
23
  class Processor:
24
+ """
25
+ Processes a single Markdown page or a directory of Markdown pages.
26
+ """
27
+
30
28
  options: ConfluenceDocumentOptions
31
- site_metadata: ConfluenceSiteMetadata
29
+ site: ConfluenceSiteMetadata
30
+ root_dir: Path
31
+
32
+ page_metadata: dict[Path, ConfluencePageMetadata]
32
33
 
33
34
  def __init__(
34
- self, options: ConfluenceDocumentOptions, site_metadata: ConfluenceSiteMetadata
35
+ self,
36
+ options: ConfluenceDocumentOptions,
37
+ site: ConfluenceSiteMetadata,
38
+ root_dir: Path,
35
39
  ) -> None:
36
40
  self.options = options
37
- self.site_metadata = site_metadata
41
+ self.site = site
42
+ self.root_dir = root_dir
38
43
 
39
- def process(self, path: Path) -> None:
40
- "Processes a single Markdown file or a directory of Markdown files."
44
+ self.page_metadata = {}
41
45
 
42
- path = path.resolve(True)
43
- if path.is_dir():
44
- self.process_directory(path)
45
- elif path.is_file():
46
- self.process_page(path)
47
- else:
48
- raise ArgumentError(f"expected: valid file or directory path; got: {path}")
49
-
50
- def process_directory(
51
- self, local_dir: Path, root_dir: Optional[Path] = None
52
- ) -> None:
53
- "Recursively scans a directory hierarchy for Markdown files."
46
+ def process_directory(self, local_dir: Path) -> None:
47
+ """
48
+ Recursively scans a directory hierarchy for Markdown files, and processes each, resolving cross-references.
49
+ """
54
50
 
55
51
  local_dir = local_dir.resolve(True)
56
- if root_dir is None:
57
- root_dir = local_dir
58
- else:
59
- root_dir = root_dir.resolve(True)
60
-
61
- LOGGER.info("Synchronizing directory: %s", local_dir)
52
+ LOGGER.info("Processing directory: %s", local_dir)
62
53
 
63
54
  # Step 1: build index of all page metadata
64
- page_metadata: dict[Path, ConfluencePageMetadata] = {}
65
- self._index_directory(local_dir, page_metadata)
66
- LOGGER.info("Indexed %d page(s)", len(page_metadata))
55
+ self._index_directory(local_dir, self.options.root_page_id)
56
+ LOGGER.info("Indexed %d page(s)", len(self.page_metadata))
67
57
 
68
58
  # Step 2: convert each page
69
- for page_path in page_metadata.keys():
70
- self._process_page(page_path, root_dir, page_metadata)
59
+ for page_path in self.page_metadata.keys():
60
+ self._process_page(page_path)
71
61
 
72
- def process_page(self, path: Path, root_dir: Optional[Path] = None) -> None:
73
- "Processes a single Markdown file."
62
+ def process_page(self, path: Path) -> None:
63
+ """
64
+ Processes a single Markdown file.
65
+ """
74
66
 
75
- path = path.resolve(True)
76
- if root_dir is None:
77
- root_dir = path.parent
78
- else:
79
- root_dir = root_dir.resolve(True)
80
-
81
- self._process_page(path, root_dir, {})
82
-
83
- def _process_page(
84
- self,
85
- path: Path,
86
- root_dir: Path,
87
- page_metadata: dict[Path, ConfluencePageMetadata],
88
- ) -> None:
89
- "Processes a single Markdown file."
67
+ LOGGER.info("Processing page: %s", path)
68
+ self._index_page(path, self.options.root_page_id)
69
+ self._process_page(path)
90
70
 
91
- document = ConfluenceDocument.create(
92
- path, self.options, root_dir, self.site_metadata, page_metadata
71
+ def _process_page(self, path: Path) -> None:
72
+ page_id, document = ConfluenceDocument.create(
73
+ path, self.options, self.root_dir, self.site, self.page_metadata
93
74
  )
94
- content = document.xhtml()
95
- with open(path.with_suffix(".csf"), "w", encoding="utf-8") as f:
96
- f.write(content)
75
+ self._save_document(page_id, document, path)
76
+
77
+ @abstractmethod
78
+ def _get_or_create_page(
79
+ self, absolute_path: Path, parent_id: Optional[ConfluencePageID]
80
+ ) -> ConfluencePageMetadata:
81
+ """
82
+ Creates a new Confluence page if no page is linked in the Markdown document.
83
+ """
84
+ ...
85
+
86
+ @abstractmethod
87
+ def _save_document(
88
+ self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
89
+ ) -> None: ...
97
90
 
98
91
  def _index_directory(
99
- self,
100
- local_dir: Path,
101
- page_metadata: dict[Path, ConfluencePageMetadata],
92
+ self, local_dir: Path, parent_id: Optional[ConfluencePageID]
102
93
  ) -> None:
103
- "Indexes Markdown files in a directory recursively."
94
+ """
95
+ Indexes Markdown files in a directory hierarchy recursively.
96
+ """
104
97
 
105
98
  LOGGER.info("Indexing directory: %s", local_dir)
106
99
 
@@ -109,7 +102,7 @@ class Processor:
109
102
  files: list[Path] = []
110
103
  directories: list[Path] = []
111
104
  for entry in os.scandir(local_dir):
112
- if matcher.is_excluded(entry.name, entry.is_dir()):
105
+ if matcher.is_excluded(entry):
113
106
  continue
114
107
 
115
108
  if entry.is_file():
@@ -117,32 +110,107 @@ class Processor:
117
110
  elif entry.is_dir():
118
111
  directories.append(Path(local_dir) / entry.name)
119
112
 
113
+ # make page act as parent node
114
+ parent_doc: Optional[Path] = None
115
+ if (Path(local_dir) / "index.md") in files:
116
+ parent_doc = Path(local_dir) / "index.md"
117
+ elif (Path(local_dir) / "README.md") in files:
118
+ parent_doc = Path(local_dir) / "README.md"
119
+ elif (Path(local_dir) / f"{local_dir.name}.md") in files:
120
+ parent_doc = Path(local_dir) / f"{local_dir.name}.md"
121
+
122
+ if parent_doc is None and self.options.keep_hierarchy:
123
+ parent_doc = Path(local_dir) / "index.md"
124
+
125
+ # create a blank page for directory entry
126
+ with open(parent_doc, "w"):
127
+ pass
128
+
129
+ if parent_doc is not None:
130
+ if parent_doc in files:
131
+ files.remove(parent_doc)
132
+
133
+ # use latest parent as parent for index page
134
+ metadata = self._get_or_create_page(parent_doc, parent_id)
135
+ LOGGER.debug("Indexed parent %s with metadata: %s", parent_doc, metadata)
136
+ self.page_metadata[parent_doc] = metadata
137
+
138
+ # assign new index page as new parent
139
+ parent_id = ConfluencePageID(metadata.page_id)
140
+
120
141
  for doc in files:
121
- metadata = self._get_page(doc)
122
- LOGGER.debug("Indexed %s with metadata: %s", doc, metadata)
123
- page_metadata[doc] = metadata
142
+ self._index_page(doc, parent_id)
124
143
 
125
144
  for directory in directories:
126
- self._index_directory(directory, page_metadata)
127
-
128
- def _get_page(self, absolute_path: Path) -> ConfluencePageMetadata:
129
- "Extracts metadata from a Markdown file."
130
-
131
- with open(absolute_path, "r", encoding="utf-8") as f:
132
- document = f.read()
133
-
134
- qualified_id, document = extract_qualified_id(document)
135
- if qualified_id is None:
136
- if self.options.root_page_id is not None:
137
- hash = hashlib.md5(document.encode("utf-8"))
138
- digest = "".join(f"{c:x}" for c in hash.digest())
139
- LOGGER.info("Identifier %s assigned to page: %s", digest, absolute_path)
140
- qualified_id = ConfluenceQualifiedID(digest)
141
- else:
142
- raise ArgumentError("required: page ID for local output")
143
-
144
- return ConfluencePageMetadata(
145
- page_id=qualified_id.page_id,
146
- space_key=qualified_id.space_key,
147
- title="",
148
- )
145
+ self._index_directory(directory, parent_id)
146
+
147
+ def _index_page(self, path: Path, parent_id: Optional[ConfluencePageID]) -> None:
148
+ """
149
+ Indexes a single Markdown file.
150
+ """
151
+
152
+ metadata = self._get_or_create_page(path, parent_id)
153
+ LOGGER.debug("Indexed %s with metadata: %s", path, metadata)
154
+ self.page_metadata[path] = metadata
155
+
156
+
157
+ class ProcessorFactory:
158
+ options: ConfluenceDocumentOptions
159
+ site: ConfluenceSiteMetadata
160
+
161
+ def __init__(
162
+ self, options: ConfluenceDocumentOptions, site: ConfluenceSiteMetadata
163
+ ) -> None:
164
+ self.options = options
165
+ self.site = site
166
+
167
+ @abstractmethod
168
+ def create(self, root_dir: Path) -> Processor: ...
169
+
170
+
171
+ class Converter:
172
+ factory: ProcessorFactory
173
+
174
+ def __init__(self, factory: ProcessorFactory) -> None:
175
+ self.factory = factory
176
+
177
+ def process(self, path: Path) -> None:
178
+ """
179
+ Processes a single Markdown file or a directory of Markdown files.
180
+ """
181
+
182
+ path = path.resolve(True)
183
+ if path.is_dir():
184
+ self.process_directory(path)
185
+ elif path.is_file():
186
+ self.process_page(path)
187
+ else:
188
+ raise ArgumentError(f"expected: valid file or directory path; got: {path}")
189
+
190
+ def process_directory(
191
+ self, local_dir: Path, root_dir: Optional[Path] = None
192
+ ) -> None:
193
+ """
194
+ Recursively scans a directory hierarchy for Markdown files, and processes each, resolving cross-references.
195
+ """
196
+
197
+ local_dir = local_dir.resolve(True)
198
+ if root_dir is None:
199
+ root_dir = local_dir
200
+ else:
201
+ root_dir = root_dir.resolve(True)
202
+
203
+ self.factory.create(root_dir).process_directory(local_dir)
204
+
205
+ def process_page(self, path: Path, root_dir: Optional[Path] = None) -> None:
206
+ """
207
+ Processes a single Markdown file.
208
+ """
209
+
210
+ path = path.resolve(True)
211
+ if root_dir is None:
212
+ root_dir = path.parent
213
+ else:
214
+ root_dir = root_dir.resolve(True)
215
+
216
+ self.factory.create(root_dir).process_page(path)