markdown-to-confluence 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/processor.py CHANGED
@@ -6,20 +6,68 @@ Copyright 2022-2025, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
+ import hashlib
9
10
  import logging
10
11
  import os
11
12
  from abc import abstractmethod
12
13
  from pathlib import Path
13
- from typing import Optional
14
+ from typing import Iterable, Optional
14
15
 
16
+ from .collection import ConfluencePageCollection
15
17
  from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
16
18
  from .matcher import Matcher, MatcherOptions
17
- from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
19
+ from .metadata import ConfluenceSiteMetadata
18
20
  from .properties import ArgumentError
21
+ from .scanner import Scanner
19
22
 
20
23
  LOGGER = logging.getLogger(__name__)
21
24
 
22
25
 
26
+ class DocumentNode:
27
+ absolute_path: Path
28
+ page_id: Optional[str]
29
+ space_key: Optional[str]
30
+ title: Optional[str]
31
+
32
+ _children: list["DocumentNode"]
33
+
34
+ def __init__(
35
+ self,
36
+ absolute_path: Path,
37
+ page_id: Optional[str],
38
+ space_key: Optional[str] = None,
39
+ title: Optional[str] = None,
40
+ ):
41
+ self.absolute_path = absolute_path
42
+ self.page_id = page_id
43
+ self.space_key = space_key
44
+ self.title = title
45
+ self._children = []
46
+
47
+ def count(self) -> int:
48
+ c = len(self._children)
49
+ for child in self._children:
50
+ c += child.count()
51
+ return c
52
+
53
+ def add_child(self, child: "DocumentNode") -> None:
54
+ self._children.append(child)
55
+
56
+ def children(self) -> Iterable["DocumentNode"]:
57
+ for child in self._children:
58
+ yield child
59
+
60
+ def descendants(self) -> Iterable["DocumentNode"]:
61
+ for child in self._children:
62
+ yield child
63
+ yield from child.descendants()
64
+
65
+ def all(self) -> Iterable["DocumentNode"]:
66
+ yield self
67
+ for child in self._children:
68
+ yield from child.all()
69
+
70
+
23
71
  class Processor:
24
72
  """
25
73
  Processes a single Markdown page or a directory of Markdown pages.
@@ -29,7 +77,7 @@ class Processor:
29
77
  site: ConfluenceSiteMetadata
30
78
  root_dir: Path
31
79
 
32
- page_metadata: dict[Path, ConfluencePageMetadata]
80
+ page_metadata: ConfluencePageCollection
33
81
 
34
82
  def __init__(
35
83
  self,
@@ -40,8 +88,7 @@ class Processor:
40
88
  self.options = options
41
89
  self.site = site
42
90
  self.root_dir = root_dir
43
-
44
- self.page_metadata = {}
91
+ self.page_metadata = ConfluencePageCollection()
45
92
 
46
93
  def process_directory(self, local_dir: Path) -> None:
47
94
  """
@@ -51,13 +98,16 @@ class Processor:
51
98
  local_dir = local_dir.resolve(True)
52
99
  LOGGER.info("Processing directory: %s", local_dir)
53
100
 
54
- # Step 1: build index of all page metadata
55
- self._index_directory(local_dir, self.options.root_page_id)
56
- LOGGER.info("Indexed %d page(s)", len(self.page_metadata))
101
+ # Step 1: build index of all Markdown files in directory hierarchy
102
+ root = self._index_directory(local_dir, None)
103
+ LOGGER.info("Indexed %d document(s)", root.count())
104
+
105
+ # Step 2: synchronize directory tree structure with page hierarchy in space
106
+ self._synchronize_tree(root, self.options.root_page_id)
57
107
 
58
- # Step 2: convert each page
59
- for page_path in self.page_metadata.keys():
60
- self._process_page(page_path)
108
+ # Step 3: synchronize files in directory hierarchy with pages in space
109
+ for path, metadata in self.page_metadata.items():
110
+ self._synchronize_page(path, ConfluencePageID(metadata.page_id))
61
111
 
62
112
  def process_page(self, path: Path) -> None:
63
113
  """
@@ -65,32 +115,52 @@ class Processor:
65
115
  """
66
116
 
67
117
  LOGGER.info("Processing page: %s", path)
68
- self._index_page(path, self.options.root_page_id)
69
- self._process_page(path)
70
118
 
71
- def _process_page(self, path: Path) -> None:
119
+ # Step 1: parse Markdown file
120
+ root = self._index_file(path)
121
+
122
+ # Step 2: find matching page in Confluence
123
+ self._synchronize_tree(root, self.options.root_page_id)
124
+
125
+ # Step 3: synchronize document with page in space
126
+ for path, metadata in self.page_metadata.items():
127
+ self._synchronize_page(path, ConfluencePageID(metadata.page_id))
128
+
129
+ def _synchronize_page(self, path: Path, page_id: ConfluencePageID) -> None:
130
+ """
131
+ Synchronizes a single Markdown document with its corresponding Confluence page.
132
+ """
133
+
72
134
  page_id, document = ConfluenceDocument.create(
73
135
  path, self.options, self.root_dir, self.site, self.page_metadata
74
136
  )
75
- self._save_document(page_id, document, path)
137
+ self._update_page(page_id, document, path)
76
138
 
77
139
  @abstractmethod
78
- def _get_or_create_page(
79
- self, absolute_path: Path, parent_id: Optional[ConfluencePageID]
80
- ) -> ConfluencePageMetadata:
140
+ def _synchronize_tree(
141
+ self, node: DocumentNode, page_id: Optional[ConfluencePageID]
142
+ ) -> None:
81
143
  """
82
- Creates a new Confluence page if no page is linked in the Markdown document.
144
+ Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
145
+
146
+ Creates new Confluence pages as necessary, e.g. if no page is linked in the Markdown document, or no page is found with lookup by page title.
147
+
148
+ May update the original Markdown document to add tags to associate the document with its corresponding Confluence page.
83
149
  """
84
150
  ...
85
151
 
86
152
  @abstractmethod
87
- def _save_document(
153
+ def _update_page(
88
154
  self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
89
- ) -> None: ...
155
+ ) -> None:
156
+ """
157
+ Saves the document as Confluence Storage Format XHTML.
158
+ """
159
+ ...
90
160
 
91
161
  def _index_directory(
92
- self, local_dir: Path, parent_id: Optional[ConfluencePageID]
93
- ) -> None:
162
+ self, local_dir: Path, parent: Optional[DocumentNode]
163
+ ) -> DocumentNode:
94
164
  """
95
165
  Indexes Markdown files in a directory hierarchy recursively.
96
166
  """
@@ -130,28 +200,54 @@ class Processor:
130
200
  if parent_doc in files:
131
201
  files.remove(parent_doc)
132
202
 
133
- # use latest parent as parent for index page
134
- metadata = self._get_or_create_page(parent_doc, parent_id)
135
- LOGGER.debug("Indexed parent %s with metadata: %s", parent_doc, metadata)
136
- self.page_metadata[parent_doc] = metadata
137
-
138
- # assign new index page as new parent
139
- parent_id = ConfluencePageID(metadata.page_id)
140
-
141
- for doc in files:
142
- self._index_page(doc, parent_id)
203
+ # promote Markdown document in directory as parent page in Confluence
204
+ node = self._index_file(parent_doc)
205
+ if parent is not None:
206
+ parent.add_child(node)
207
+ parent = node
208
+ elif parent is None:
209
+ # create new top-level node
210
+ if self.options.root_page_id is not None:
211
+ page_id = self.options.root_page_id.page_id
212
+ parent = DocumentNode(local_dir, page_id=page_id)
213
+ else:
214
+ # local use only, raises error with remote synchronization
215
+ parent = DocumentNode(local_dir, page_id=None)
216
+
217
+ for file in files:
218
+ node = self._index_file(file)
219
+ parent.add_child(node)
143
220
 
144
221
  for directory in directories:
145
- self._index_directory(directory, parent_id)
222
+ self._index_directory(directory, parent)
146
223
 
147
- def _index_page(self, path: Path, parent_id: Optional[ConfluencePageID]) -> None:
224
+ return parent
225
+
226
+ def _index_file(self, path: Path) -> DocumentNode:
148
227
  """
149
228
  Indexes a single Markdown file.
150
229
  """
151
230
 
152
- metadata = self._get_or_create_page(path, parent_id)
153
- LOGGER.debug("Indexed %s with metadata: %s", path, metadata)
154
- self.page_metadata[path] = metadata
231
+ LOGGER.info("Indexing file: %s", path)
232
+
233
+ # extract information from a Markdown document found in a local directory.
234
+ document = Scanner().read(path)
235
+
236
+ return DocumentNode(
237
+ absolute_path=path,
238
+ page_id=document.page_id,
239
+ space_key=document.space_key,
240
+ title=document.title,
241
+ )
242
+
243
+ def _generate_hash(self, absolute_path: Path) -> str:
244
+ """
245
+ Computes a digest to be used as a unique string.
246
+ """
247
+
248
+ relative_path = absolute_path.relative_to(self.root_dir)
249
+ hash = hashlib.md5(relative_path.as_posix().encode("utf-8"))
250
+ return "".join(f"{c:x}" for c in hash.digest())
155
251
 
156
252
 
157
253
  class ProcessorFactory:
md2conf/properties.py CHANGED
@@ -54,15 +54,28 @@ class ConfluenceSiteProperties:
54
54
  self.space_key = opt_space_key
55
55
 
56
56
 
57
- class ConfluenceConnectionProperties(ConfluenceSiteProperties):
58
- "Properties related to connecting to Confluence."
59
-
57
+ class ConfluenceConnectionProperties:
58
+ """
59
+ Properties related to connecting to Confluence.
60
+
61
+ :param api_url: Confluence API URL. Required for scoped tokens.
62
+ :param user_name: Confluence user name.
63
+ :param api_key: Confluence API key.
64
+ :param headers: Additional HTTP headers to pass to Confluence REST API calls.
65
+ """
66
+
67
+ domain: Optional[str]
68
+ base_path: Optional[str]
69
+ space_key: Optional[str]
70
+ api_url: Optional[str]
60
71
  user_name: Optional[str]
61
72
  api_key: str
62
73
  headers: Optional[dict[str, str]]
63
74
 
64
75
  def __init__(
65
76
  self,
77
+ *,
78
+ api_url: Optional[str] = None,
66
79
  domain: Optional[str] = None,
67
80
  base_path: Optional[str] = None,
68
81
  user_name: Optional[str] = None,
@@ -70,14 +83,20 @@ class ConfluenceConnectionProperties(ConfluenceSiteProperties):
70
83
  space_key: Optional[str] = None,
71
84
  headers: Optional[dict[str, str]] = None,
72
85
  ) -> None:
73
- super().__init__(domain, base_path, space_key)
74
-
86
+ opt_api_url = api_url or os.getenv("CONFLUENCE_API_URL")
87
+ opt_domain = domain or os.getenv("CONFLUENCE_DOMAIN")
88
+ opt_base_path = base_path or os.getenv("CONFLUENCE_PATH")
89
+ opt_space_key = space_key or os.getenv("CONFLUENCE_SPACE_KEY")
75
90
  opt_user_name = user_name or os.getenv("CONFLUENCE_USER_NAME")
76
91
  opt_api_key = api_key or os.getenv("CONFLUENCE_API_KEY")
77
92
 
78
93
  if not opt_api_key:
79
94
  raise ArgumentError("Confluence API key not specified")
80
95
 
96
+ self.api_url = opt_api_url
97
+ self.domain = opt_domain
98
+ self.base_path = opt_base_path
99
+ self.space_key = opt_space_key
81
100
  self.user_name = opt_user_name
82
101
  self.api_key = opt_api_key
83
102
  self.headers = headers
md2conf/scanner.py CHANGED
@@ -9,15 +9,26 @@ Copyright 2022-2025, Levente Hunyadi
9
9
  import re
10
10
  from dataclasses import dataclass
11
11
  from pathlib import Path
12
- from typing import Any, Optional
12
+ from typing import Any, Optional, TypeVar
13
13
 
14
14
  import yaml
15
+ from strong_typing.core import JsonType
16
+ from strong_typing.serialization import DeserializerOptions, json_to_object
17
+
18
+ T = TypeVar("T")
19
+
20
+
21
+ def _json_to_object(
22
+ typ: type[T],
23
+ data: JsonType,
24
+ ) -> T:
25
+ return json_to_object(typ, data, options=DeserializerOptions(skip_unassigned=True))
15
26
 
16
27
 
17
28
  def extract_value(pattern: str, text: str) -> tuple[Optional[str], str]:
18
29
  values: list[str] = []
19
30
 
20
- def _repl_func(matchobj: re.Match) -> str:
31
+ def _repl_func(matchobj: re.Match[str]) -> str:
21
32
  values.append(matchobj.group(1))
22
33
  return ""
23
34
 
@@ -46,16 +57,27 @@ def extract_frontmatter_properties(text: str) -> tuple[Optional[dict[str, Any]],
46
57
  return properties, text
47
58
 
48
59
 
49
- def get_string(properties: dict[str, Any], key: str) -> Optional[str]:
50
- value = properties.get(key)
51
- if value is None:
52
- return None
53
- elif not isinstance(value, str):
54
- raise ValueError(
55
- f"expected dictionary value type of `str` for key `{key}`; got value of type `{type(value).__name__}`"
56
- )
57
- else:
58
- return value
60
+ @dataclass
61
+ class DocumentProperties:
62
+ """
63
+ An object that holds properties extracted from the front-matter of a Markdown document.
64
+
65
+ :param page_id: Confluence page ID.
66
+ :param space_key: Confluence space key.
67
+ :param confluence_page_id: Confluence page ID. (Alternative name for JSON de-serialization.)
68
+ :param confluence_space_key: Confluence space key. (Alternative name for JSON de-serialization.)
69
+ :param generated_by: Text identifying the tool that generated the document.
70
+ :param title: The title extracted from front-matter.
71
+ :param tags: A list of tags (content labels) extracted from front-matter.
72
+ """
73
+
74
+ page_id: Optional[str]
75
+ space_key: Optional[str]
76
+ confluence_page_id: Optional[str]
77
+ confluence_space_key: Optional[str]
78
+ generated_by: Optional[str]
79
+ title: Optional[str]
80
+ tags: Optional[list[str]]
59
81
 
60
82
 
61
83
  @dataclass
@@ -67,6 +89,7 @@ class ScannedDocument:
67
89
  :param space_key: Confluence space key.
68
90
  :param generated_by: Text identifying the tool that generated the document.
69
91
  :param title: The title extracted from front-matter.
92
+ :param tags: A list of tags (content labels) extracted from front-matter.
70
93
  :param text: Text that remains after front-matter and inline properties have been extracted.
71
94
  """
72
95
 
@@ -74,6 +97,7 @@ class ScannedDocument:
74
97
  space_key: Optional[str]
75
98
  generated_by: Optional[str]
76
99
  title: Optional[str]
100
+ tags: Optional[list[str]]
77
101
  text: str
78
102
 
79
103
 
@@ -88,30 +112,38 @@ class Scanner:
88
112
  text = f.read()
89
113
 
90
114
  # extract Confluence page ID
91
- page_id, text = extract_value(r"<!--\s+confluence-page-id:\s*(\d+)\s+-->", text)
115
+ page_id, text = extract_value(
116
+ r"<!--\s+confluence[-_]page[-_]id:\s*(\d+)\s+-->", text
117
+ )
92
118
 
93
119
  # extract Confluence space key
94
120
  space_key, text = extract_value(
95
- r"<!--\s+confluence-space-key:\s*(\S+)\s+-->", text
121
+ r"<!--\s+confluence[-_]space[-_]key:\s*(\S+)\s+-->", text
96
122
  )
97
123
 
98
124
  # extract 'generated-by' tag text
99
- generated_by, text = extract_value(r"<!--\s+generated-by:\s*(.*)\s+-->", text)
125
+ generated_by, text = extract_value(
126
+ r"<!--\s+generated[-_]by:\s*(.*)\s+-->", text
127
+ )
100
128
 
101
129
  title: Optional[str] = None
130
+ tags: Optional[list[str]] = None
102
131
 
103
132
  # extract front-matter
104
- properties, text = extract_frontmatter_properties(text)
105
- if properties is not None:
106
- page_id = page_id or get_string(properties, "confluence-page-id")
107
- space_key = space_key or get_string(properties, "confluence-space-key")
108
- generated_by = generated_by or get_string(properties, "generated-by")
109
- title = get_string(properties, "title")
133
+ data, text = extract_frontmatter_properties(text)
134
+ if data is not None:
135
+ p = _json_to_object(DocumentProperties, data)
136
+ page_id = page_id or p.confluence_page_id or p.page_id
137
+ space_key = space_key or p.confluence_space_key or p.space_key
138
+ generated_by = generated_by or p.generated_by
139
+ title = p.title
140
+ tags = p.tags
110
141
 
111
142
  return ScannedDocument(
112
143
  page_id=page_id,
113
144
  space_key=space_key,
114
145
  generated_by=generated_by,
115
146
  title=title,
147
+ tags=tags,
116
148
  text=text,
117
149
  )
@@ -1,23 +0,0 @@
1
- markdown_to_confluence-0.3.5.dist-info/licenses/LICENSE,sha256=Pv43so2bPfmKhmsrmXFyAvS7M30-1i1tzjz6-dfhyOo,1077
2
- md2conf/__init__.py,sha256=Uaqb3maQScpYs3FiH8kuM6pUh5JzE4Vy52MgU9pvMTw,402
3
- md2conf/__main__.py,sha256=bFcfmSnTWeuhmDm7bJ3jJabZ2S8W9biuAP6_R-Cc9As,8034
4
- md2conf/api.py,sha256=VxrAJ4yCsdGFVAEQQWw5aONwsMz0b6KvN4EMLXCKOwE,26905
5
- md2conf/application.py,sha256=SIM4yLHaLnvG7wRJLbRvptrkc0q4JMuAhDnanqsuYzA,6697
6
- md2conf/converter.py,sha256=ASXhs7g79dOU4x1QhfvKL8mtwth508GTGcb3AUHigC4,37286
7
- md2conf/emoji.py,sha256=48QJtOD0F3Be1laYLvAOwe0GxrJS-vcfjtCdiBsNcAc,1960
8
- md2conf/entities.dtd,sha256=M6NzqL5N7dPs_eUA_6sDsiSLzDaAacrx9LdttiufvYU,30215
9
- md2conf/local.py,sha256=998bBRpDAOywA-L0KD4_VyuL2Xftflv0ler-uNPQZn4,3866
10
- md2conf/matcher.py,sha256=y5WEZNklTpUoJtMJlulTvfhl_v-UMU6wySJAKit91ig,4940
11
- md2conf/mermaid.py,sha256=ZETocFDKi_fSYyVR1pJ7fo207YYFSuT44MSYFQ8-cZ0,2562
12
- md2conf/metadata.py,sha256=Xozg2PjJnis7VQYQT_edIvTb8u0cs_ZizPOAxc1N8vg,1003
13
- md2conf/processor.py,sha256=jSLFy8hqZJXf3b79jp31Fn9-cm4j9xq4HDChp9pyhP0,6706
14
- md2conf/properties.py,sha256=TOCXLdTfYkKjRwZaMgvXw0mNCI4opEUwpBXro2Kv2B4,2467
15
- md2conf/puppeteer-config.json,sha256=-dMTAN_7kNTGbDlfXzApl0KJpAWna9YKZdwMKbpOb60,159
16
- md2conf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- md2conf/scanner.py,sha256=iF8NCQAFO6Yut5aAQr7uxfWzVMMt9j3T5ADoVVSJWKQ,3543
18
- markdown_to_confluence-0.3.5.dist-info/METADATA,sha256=NiXwBXtQ5WhHce_JX7TBUSefQSR5jk5fERe46BL4vwE,18462
19
- markdown_to_confluence-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
- markdown_to_confluence-0.3.5.dist-info/entry_points.txt,sha256=F1zxa1wtEObtbHS-qp46330WVFLHdMnV2wQ-ZorRmX0,50
21
- markdown_to_confluence-0.3.5.dist-info/top_level.txt,sha256=_FJfl_kHrHNidyjUOuS01ngu_jDsfc-ZjSocNRJnTzU,8
22
- markdown_to_confluence-0.3.5.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
23
- markdown_to_confluence-0.3.5.dist-info/RECORD,,