markdown-to-confluence 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/local.py CHANGED
@@ -6,22 +6,15 @@ Copyright 2022-2025, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
- import hashlib
10
9
  import logging
11
10
  import os
12
11
  from pathlib import Path
13
12
  from typing import Optional
14
13
 
15
- from .converter import (
16
- ConfluenceDocument,
17
- ConfluenceDocumentOptions,
18
- ConfluencePageID,
19
- ConfluenceQualifiedID,
20
- extract_qualified_id,
21
- )
14
+ from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
15
+ from .extra import override
22
16
  from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
23
- from .processor import Converter, Processor, ProcessorFactory
24
- from .properties import PageError
17
+ from .processor import Converter, DocumentNode, Processor, ProcessorFactory
25
18
 
26
19
  LOGGER = logging.getLogger(__name__)
27
20
 
@@ -51,46 +44,41 @@ class LocalProcessor(Processor):
51
44
  super().__init__(options, site, root_dir)
52
45
  self.out_dir = out_dir or root_dir
53
46
 
54
- def _get_or_create_page(
55
- self,
56
- absolute_path: Path,
57
- parent_id: Optional[ConfluencePageID],
58
- *,
59
- title: Optional[str] = None,
60
- ) -> ConfluencePageMetadata:
61
- """
62
- Extracts metadata from a Markdown file.
47
+ @override
48
+ def _synchronize_tree(
49
+ self, root: DocumentNode, root_id: Optional[ConfluencePageID]
50
+ ) -> None:
63
51
  """
52
+ Creates the cross-reference index.
64
53
 
65
- # parse file
66
- with open(absolute_path, "r", encoding="utf-8") as f:
67
- text = f.read()
68
-
69
- qualified_id, text = extract_qualified_id(text)
54
+ Does not change Markdown files.
55
+ """
70
56
 
71
- if qualified_id is None:
72
- if parent_id is None:
73
- raise PageError(
74
- f"expected: parent page ID for Markdown file with no linked Confluence page: {absolute_path}"
57
+ for node in root.all():
58
+ if node.page_id is not None:
59
+ page_id = node.page_id
60
+ else:
61
+ digest = self._generate_hash(node.absolute_path)
62
+ LOGGER.info(
63
+ "Identifier %s assigned to page: %s", digest, node.absolute_path
75
64
  )
76
-
77
- hash = hashlib.md5(text.encode("utf-8"))
78
- digest = "".join(f"{c:x}" for c in hash.digest())
79
- LOGGER.info("Identifier %s assigned to page: %s", digest, absolute_path)
80
- qualified_id = ConfluenceQualifiedID(digest)
81
-
82
- return ConfluencePageMetadata(
83
- page_id=qualified_id.page_id,
84
- space_key=qualified_id.space_key,
85
- title="",
86
- overwrite=True,
87
- )
88
-
89
- def _save_document(self, document: ConfluenceDocument, path: Path) -> None:
65
+ page_id = digest
66
+
67
+ self.page_metadata.add(
68
+ node.absolute_path,
69
+ ConfluencePageMetadata(
70
+ page_id=page_id,
71
+ space_key=node.space_key or self.site.space_key or "HOME",
72
+ title=node.title or "",
73
+ ),
74
+ )
75
+
76
+ @override
77
+ def _update_page(
78
+ self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
79
+ ) -> None:
90
80
  """
91
- Saves a new version of a Confluence document.
92
-
93
- A derived class may invoke Confluence REST API to persist the new version.
81
+ Saves the document as Confluence Storage Format XHTML to the local disk.
94
82
  """
95
83
 
96
84
  content = document.xhtml()
md2conf/matcher.py CHANGED
@@ -10,15 +10,15 @@ import os.path
10
10
  from dataclasses import dataclass
11
11
  from fnmatch import fnmatch
12
12
  from pathlib import Path
13
- from typing import Iterable, Optional
13
+ from typing import Iterable, Optional, Union, overload
14
14
 
15
15
 
16
- @dataclass
16
+ @dataclass(frozen=True)
17
17
  class Entry:
18
18
  """
19
19
  Represents a file or directory entry.
20
20
 
21
- :param name: Name of the file-system entry.
21
+ :param name: Name of the file-system entry to match against the rule-set.
22
22
  :param is_dir: True if the entry is a directory.
23
23
  """
24
24
 
@@ -43,6 +43,15 @@ class MatcherOptions:
43
43
  self.extension = f".{self.extension}"
44
44
 
45
45
 
46
+ def _entry_name_dir(entry: Union[Entry, os.DirEntry[str]]) -> tuple[str, bool]:
47
+ if isinstance(entry, Entry):
48
+ return entry.name, entry.is_dir
49
+ elif isinstance(entry, os.DirEntry):
50
+ return entry.name, entry.is_dir()
51
+ else:
52
+ raise NotImplementedError("type match not exhaustive")
53
+
54
+
46
55
  class Matcher:
47
56
  "Compares file and directory names against a list of exclude/include patterns."
48
57
 
@@ -58,20 +67,40 @@ class Matcher:
58
67
  else:
59
68
  self.rules = []
60
69
 
70
+ for rule in self.rules:
71
+ if "/" in rule or os.path.sep in rule:
72
+ raise ValueError(f"nested matching not supported: {rule}")
73
+
61
74
  def extension_matches(self, name: str) -> bool:
62
75
  "True if the file name has the expected extension."
63
76
 
64
77
  return self.options.extension is None or name.endswith(self.options.extension)
65
78
 
66
- def is_excluded(self, name: str, is_dir: bool) -> bool:
79
+ @overload
80
+ def is_excluded(self, entry: Entry) -> bool:
81
+ """
82
+ True if the file or directory name matches any of the exclusion patterns.
83
+
84
+ :param entry: A data-class object.
85
+ :returns: True if the name matches at least one of the exclusion patterns.
86
+ """
87
+
88
+ ...
89
+
90
+ @overload
91
+ def is_excluded(self, entry: os.DirEntry[str]) -> bool:
67
92
  """
68
93
  True if the file or directory name matches any of the exclusion patterns.
69
94
 
70
- :param name: Name to match against the rule-set.
71
- :param is_dir: Whether the name identifies a directory.
95
+ :param entry: An object returned by `scandir`.
72
96
  :returns: True if the name matches at least one of the exclusion patterns.
73
97
  """
74
98
 
99
+ ...
100
+
101
+ def is_excluded(self, entry: Union[Entry, os.DirEntry[str]]) -> bool:
102
+ name, is_dir = _entry_name_dir(entry)
103
+
75
104
  # skip hidden files and directories
76
105
  if name.startswith("."):
77
106
  return True
@@ -86,26 +115,38 @@ class Matcher:
86
115
  else:
87
116
  return False
88
117
 
89
- def is_included(self, name: str, is_dir: bool) -> bool:
118
+ @overload
119
+ def is_included(self, entry: Entry) -> bool:
120
+ """
121
+ True if the file or directory name matches none of the exclusion patterns.
122
+
123
+ :param entry: A data-class object.
124
+ :returns: True if the name doesn't match any of the exclusion patterns.
125
+ """
126
+ ...
127
+
128
+ @overload
129
+ def is_included(self, entry: os.DirEntry[str]) -> bool:
90
130
  """
91
131
  True if the file or directory name matches none of the exclusion patterns.
92
132
 
93
- :param name: Name to match against the rule-set.
94
- :param is_dir: Whether the name identifies a directory.
133
+ :param entry: An object returned by `scandir`.
95
134
  :returns: True if the name doesn't match any of the exclusion patterns.
96
135
  """
136
+ ...
97
137
 
98
- return not self.is_excluded(name, is_dir)
138
+ def is_included(self, entry: Union[Entry, os.DirEntry[str]]) -> bool:
139
+ return not self.is_excluded(entry)
99
140
 
100
- def filter(self, items: Iterable[Entry]) -> list[Entry]:
141
+ def filter(self, entries: Iterable[Entry]) -> list[Entry]:
101
142
  """
102
143
  Returns only those elements from the input that don't match any of the exclusion rules.
103
144
 
104
- :param items: A list of names to filter.
145
+ :param entries: A list of names to filter.
105
146
  :returns: A filtered list of names that didn't match any of the exclusion rules.
106
147
  """
107
148
 
108
- return [item for item in items if self.is_included(item.name, item.is_dir)]
149
+ return [entry for entry in entries if self.is_included(entry)]
109
150
 
110
151
  def scandir(self, path: Path) -> list[Entry]:
111
152
  """
md2conf/mermaid.py CHANGED
@@ -79,10 +79,16 @@ def render_diagram(source: str, output_format: Literal["png", "svg"] = "png") ->
79
79
  )
80
80
  stdout, stderr = proc.communicate(input=source.encode("utf-8"))
81
81
  if proc.returncode:
82
- raise RuntimeError(
83
- f"failed to convert Mermaid diagram; exit code: {proc.returncode}, "
84
- f"output:\n{stdout.decode('utf-8')}\n{stderr.decode('utf-8')}"
85
- )
82
+ messages = [
83
+ f"failed to convert Mermaid diagram; exit code: {proc.returncode}"
84
+ ]
85
+ console_output = stdout.decode("utf-8")
86
+ if console_output:
87
+ messages.append(f"output:\n{console_output}")
88
+ console_error = stderr.decode("utf-8")
89
+ if console_error:
90
+ messages.append(f"error:\n{console_error}")
91
+ raise RuntimeError("\n".join(messages))
86
92
  with open(filename, "rb") as image:
87
93
  return image.read()
88
94
 
md2conf/metadata.py CHANGED
@@ -33,10 +33,8 @@ class ConfluencePageMetadata:
33
33
  :param page_id: Confluence page ID.
34
34
  :param space_key: Confluence space key.
35
35
  :param title: Document title.
36
- :param overwrite: True if operations are allowed to update document properties (e.g. title).
37
36
  """
38
37
 
39
38
  page_id: str
40
- space_key: Optional[str]
39
+ space_key: str
41
40
  title: str
42
- overwrite: bool
md2conf/processor.py CHANGED
@@ -6,20 +6,68 @@ Copyright 2022-2025, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
+ import hashlib
9
10
  import logging
10
11
  import os
11
12
  from abc import abstractmethod
12
13
  from pathlib import Path
13
- from typing import Optional
14
+ from typing import Iterable, Optional
14
15
 
16
+ from .collection import ConfluencePageCollection
15
17
  from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
16
18
  from .matcher import Matcher, MatcherOptions
17
- from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
19
+ from .metadata import ConfluenceSiteMetadata
18
20
  from .properties import ArgumentError
21
+ from .scanner import Scanner
19
22
 
20
23
  LOGGER = logging.getLogger(__name__)
21
24
 
22
25
 
26
+ class DocumentNode:
27
+ absolute_path: Path
28
+ page_id: Optional[str]
29
+ space_key: Optional[str]
30
+ title: Optional[str]
31
+
32
+ _children: list["DocumentNode"]
33
+
34
+ def __init__(
35
+ self,
36
+ absolute_path: Path,
37
+ page_id: Optional[str],
38
+ space_key: Optional[str] = None,
39
+ title: Optional[str] = None,
40
+ ):
41
+ self.absolute_path = absolute_path
42
+ self.page_id = page_id
43
+ self.space_key = space_key
44
+ self.title = title
45
+ self._children = []
46
+
47
+ def count(self) -> int:
48
+ c = len(self._children)
49
+ for child in self._children:
50
+ c += child.count()
51
+ return c
52
+
53
+ def add_child(self, child: "DocumentNode") -> None:
54
+ self._children.append(child)
55
+
56
+ def children(self) -> Iterable["DocumentNode"]:
57
+ for child in self._children:
58
+ yield child
59
+
60
+ def descendants(self) -> Iterable["DocumentNode"]:
61
+ for child in self._children:
62
+ yield child
63
+ yield from child.descendants()
64
+
65
+ def all(self) -> Iterable["DocumentNode"]:
66
+ yield self
67
+ for child in self._children:
68
+ yield from child.all()
69
+
70
+
23
71
  class Processor:
24
72
  """
25
73
  Processes a single Markdown page or a directory of Markdown pages.
@@ -29,7 +77,7 @@ class Processor:
29
77
  site: ConfluenceSiteMetadata
30
78
  root_dir: Path
31
79
 
32
- page_metadata: dict[Path, ConfluencePageMetadata]
80
+ page_metadata: ConfluencePageCollection
33
81
 
34
82
  def __init__(
35
83
  self,
@@ -40,8 +88,7 @@ class Processor:
40
88
  self.options = options
41
89
  self.site = site
42
90
  self.root_dir = root_dir
43
-
44
- self.page_metadata = {}
91
+ self.page_metadata = ConfluencePageCollection()
45
92
 
46
93
  def process_directory(self, local_dir: Path) -> None:
47
94
  """
@@ -51,13 +98,16 @@ class Processor:
51
98
  local_dir = local_dir.resolve(True)
52
99
  LOGGER.info("Processing directory: %s", local_dir)
53
100
 
54
- # Step 1: build index of all page metadata
55
- self._index_directory(local_dir, self.options.root_page_id)
56
- LOGGER.info("Indexed %d page(s)", len(self.page_metadata))
101
+ # Step 1: build index of all Markdown files in directory hierarchy
102
+ root = self._index_directory(local_dir, None)
103
+ LOGGER.info("Indexed %d document(s)", root.count())
57
104
 
58
- # Step 2: convert each page
59
- for page_path in self.page_metadata.keys():
60
- self._process_page(page_path)
105
+ # Step 2: synchronize directory tree structure with page hierarchy in space
106
+ self._synchronize_tree(root, self.options.root_page_id)
107
+
108
+ # Step 3: synchronize files in directory hierarchy with pages in space
109
+ for path, metadata in self.page_metadata.items():
110
+ self._synchronize_page(path, ConfluencePageID(metadata.page_id))
61
111
 
62
112
  def process_page(self, path: Path) -> None:
63
113
  """
@@ -65,34 +115,52 @@ class Processor:
65
115
  """
66
116
 
67
117
  LOGGER.info("Processing page: %s", path)
68
- self._index_page(path, self.options.root_page_id)
69
- self._process_page(path)
70
118
 
71
- def _process_page(self, path: Path) -> None:
72
- document = ConfluenceDocument.create(
119
+ # Step 1: parse Markdown file
120
+ root = self._index_file(path)
121
+
122
+ # Step 2: find matching page in Confluence
123
+ self._synchronize_tree(root, self.options.root_page_id)
124
+
125
+ # Step 3: synchronize document with page in space
126
+ for path, metadata in self.page_metadata.items():
127
+ self._synchronize_page(path, ConfluencePageID(metadata.page_id))
128
+
129
+ def _synchronize_page(self, path: Path, page_id: ConfluencePageID) -> None:
130
+ """
131
+ Synchronizes a single Markdown document with its corresponding Confluence page.
132
+ """
133
+
134
+ page_id, document = ConfluenceDocument.create(
73
135
  path, self.options, self.root_dir, self.site, self.page_metadata
74
136
  )
75
- self._save_document(document, path)
137
+ self._update_page(page_id, document, path)
76
138
 
77
139
  @abstractmethod
78
- def _get_or_create_page(
79
- self,
80
- absolute_path: Path,
81
- parent_id: Optional[ConfluencePageID],
82
- *,
83
- title: Optional[str] = None,
84
- ) -> ConfluencePageMetadata:
140
+ def _synchronize_tree(
141
+ self, node: DocumentNode, page_id: Optional[ConfluencePageID]
142
+ ) -> None:
85
143
  """
86
- Creates a new Confluence page if no page is linked in the Markdown document.
144
+ Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
145
+
146
+ Creates new Confluence pages as necessary, e.g. if no page is linked in the Markdown document, or no page is found with lookup by page title.
147
+
148
+ May update the original Markdown document to add tags to associate the document with its corresponding Confluence page.
87
149
  """
88
150
  ...
89
151
 
90
152
  @abstractmethod
91
- def _save_document(self, document: ConfluenceDocument, path: Path) -> None: ...
153
+ def _update_page(
154
+ self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
155
+ ) -> None:
156
+ """
157
+ Saves the document as Confluence Storage Format XHTML.
158
+ """
159
+ ...
92
160
 
93
161
  def _index_directory(
94
- self, local_dir: Path, parent_id: Optional[ConfluencePageID]
95
- ) -> None:
162
+ self, local_dir: Path, parent: Optional[DocumentNode]
163
+ ) -> DocumentNode:
96
164
  """
97
165
  Indexes Markdown files in a directory hierarchy recursively.
98
166
  """
@@ -104,7 +172,7 @@ class Processor:
104
172
  files: list[Path] = []
105
173
  directories: list[Path] = []
106
174
  for entry in os.scandir(local_dir):
107
- if matcher.is_excluded(entry.name, entry.is_dir()):
175
+ if matcher.is_excluded(entry):
108
176
  continue
109
177
 
110
178
  if entry.is_file():
@@ -132,28 +200,54 @@ class Processor:
132
200
  if parent_doc in files:
133
201
  files.remove(parent_doc)
134
202
 
135
- # use latest parent as parent for index page
136
- metadata = self._get_or_create_page(parent_doc, parent_id)
137
- LOGGER.debug("Indexed parent %s with metadata: %s", parent_doc, metadata)
138
- self.page_metadata[parent_doc] = metadata
139
-
140
- # assign new index page as new parent
141
- parent_id = ConfluencePageID(metadata.page_id)
142
-
143
- for doc in files:
144
- self._index_page(doc, parent_id)
203
+ # promote Markdown document in directory as parent page in Confluence
204
+ node = self._index_file(parent_doc)
205
+ if parent is not None:
206
+ parent.add_child(node)
207
+ parent = node
208
+ elif parent is None:
209
+ # create new top-level node
210
+ if self.options.root_page_id is not None:
211
+ page_id = self.options.root_page_id.page_id
212
+ parent = DocumentNode(local_dir, page_id=page_id)
213
+ else:
214
+ # local use only, raises error with remote synchronization
215
+ parent = DocumentNode(local_dir, page_id=None)
216
+
217
+ for file in files:
218
+ node = self._index_file(file)
219
+ parent.add_child(node)
145
220
 
146
221
  for directory in directories:
147
- self._index_directory(directory, parent_id)
222
+ self._index_directory(directory, parent)
223
+
224
+ return parent
148
225
 
149
- def _index_page(self, path: Path, parent_id: Optional[ConfluencePageID]) -> None:
226
+ def _index_file(self, path: Path) -> DocumentNode:
150
227
  """
151
228
  Indexes a single Markdown file.
152
229
  """
153
230
 
154
- metadata = self._get_or_create_page(path, parent_id)
155
- LOGGER.debug("Indexed %s with metadata: %s", path, metadata)
156
- self.page_metadata[path] = metadata
231
+ LOGGER.info("Indexing file: %s", path)
232
+
233
+ # extract information from a Markdown document found in a local directory.
234
+ document = Scanner().read(path)
235
+
236
+ return DocumentNode(
237
+ absolute_path=path,
238
+ page_id=document.page_id,
239
+ space_key=document.space_key,
240
+ title=document.title,
241
+ )
242
+
243
+ def _generate_hash(self, absolute_path: Path) -> str:
244
+ """
245
+ Computes a digest to be used as a unique string.
246
+ """
247
+
248
+ relative_path = absolute_path.relative_to(self.root_dir)
249
+ hash = hashlib.md5(relative_path.as_posix().encode("utf-8"))
250
+ return "".join(f"{c:x}" for c in hash.digest())
157
251
 
158
252
 
159
253
  class ProcessorFactory:
md2conf/properties.py CHANGED
@@ -54,15 +54,28 @@ class ConfluenceSiteProperties:
54
54
  self.space_key = opt_space_key
55
55
 
56
56
 
57
- class ConfluenceConnectionProperties(ConfluenceSiteProperties):
58
- "Properties related to connecting to Confluence."
59
-
57
+ class ConfluenceConnectionProperties:
58
+ """
59
+ Properties related to connecting to Confluence.
60
+
61
+ :param api_url: Confluence API URL. Required for scoped tokens.
62
+ :param user_name: Confluence user name.
63
+ :param api_key: Confluence API key.
64
+ :param headers: Additional HTTP headers to pass to Confluence REST API calls.
65
+ """
66
+
67
+ domain: Optional[str]
68
+ base_path: Optional[str]
69
+ space_key: Optional[str]
70
+ api_url: Optional[str]
60
71
  user_name: Optional[str]
61
72
  api_key: str
62
73
  headers: Optional[dict[str, str]]
63
74
 
64
75
  def __init__(
65
76
  self,
77
+ *,
78
+ api_url: Optional[str] = None,
66
79
  domain: Optional[str] = None,
67
80
  base_path: Optional[str] = None,
68
81
  user_name: Optional[str] = None,
@@ -70,14 +83,20 @@ class ConfluenceConnectionProperties(ConfluenceSiteProperties):
70
83
  space_key: Optional[str] = None,
71
84
  headers: Optional[dict[str, str]] = None,
72
85
  ) -> None:
73
- super().__init__(domain, base_path, space_key)
74
-
86
+ opt_api_url = api_url or os.getenv("CONFLUENCE_API_URL")
87
+ opt_domain = domain or os.getenv("CONFLUENCE_DOMAIN")
88
+ opt_base_path = base_path or os.getenv("CONFLUENCE_PATH")
89
+ opt_space_key = space_key or os.getenv("CONFLUENCE_SPACE_KEY")
75
90
  opt_user_name = user_name or os.getenv("CONFLUENCE_USER_NAME")
76
91
  opt_api_key = api_key or os.getenv("CONFLUENCE_API_KEY")
77
92
 
78
93
  if not opt_api_key:
79
94
  raise ArgumentError("Confluence API key not specified")
80
95
 
96
+ self.api_url = opt_api_url
97
+ self.domain = opt_domain
98
+ self.base_path = opt_base_path
99
+ self.space_key = opt_space_key
81
100
  self.user_name = opt_user_name
82
101
  self.api_key = opt_api_key
83
102
  self.headers = headers