markdown-to-confluence 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/METADATA +24 -11
- markdown_to_confluence-0.3.5.dist-info/RECORD +23 -0
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/WHEEL +1 -1
- md2conf/__init__.py +1 -1
- md2conf/__main__.py +6 -5
- md2conf/api.py +235 -45
- md2conf/application.py +100 -182
- md2conf/converter.py +53 -112
- md2conf/local.py +125 -0
- md2conf/matcher.py +54 -13
- md2conf/mermaid.py +10 -4
- md2conf/metadata.py +42 -0
- md2conf/processor.py +158 -90
- md2conf/scanner.py +117 -0
- markdown_to_confluence-0.3.3.dist-info/RECORD +0 -20
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/entry_points.txt +0 -0
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/licenses/LICENSE +0 -0
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/top_level.txt +0 -0
- {markdown_to_confluence-0.3.3.dist-info → markdown_to_confluence-0.3.5.dist-info}/zip-safe +0 -0
md2conf/local.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Publish Markdown files to Confluence wiki.
|
|
3
|
+
|
|
4
|
+
Copyright 2022-2025, Levente Hunyadi
|
|
5
|
+
|
|
6
|
+
:see: https://github.com/hunyadi/md2conf
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
|
|
16
|
+
from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
|
|
17
|
+
from .processor import Converter, Processor, ProcessorFactory
|
|
18
|
+
from .properties import PageError
|
|
19
|
+
from .scanner import Scanner
|
|
20
|
+
|
|
21
|
+
LOGGER = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LocalProcessor(Processor):
|
|
25
|
+
"""
|
|
26
|
+
Transforms a single Markdown page or a directory of Markdown pages into Confluence Storage Format (CSF) documents.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
options: ConfluenceDocumentOptions,
|
|
32
|
+
site: ConfluenceSiteMetadata,
|
|
33
|
+
*,
|
|
34
|
+
out_dir: Optional[Path],
|
|
35
|
+
root_dir: Path,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""
|
|
38
|
+
Initializes a new processor instance.
|
|
39
|
+
|
|
40
|
+
:param options: Options that control the generated page content.
|
|
41
|
+
:param site: Data associated with a Confluence wiki site.
|
|
42
|
+
:param out_dir: File system directory to write generated CSF documents to.
|
|
43
|
+
:param root_dir: File system directory that acts as topmost root node.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
super().__init__(options, site, root_dir)
|
|
47
|
+
self.out_dir = out_dir or root_dir
|
|
48
|
+
|
|
49
|
+
def _get_or_create_page(
|
|
50
|
+
self, absolute_path: Path, parent_id: Optional[ConfluencePageID]
|
|
51
|
+
) -> ConfluencePageMetadata:
|
|
52
|
+
"""
|
|
53
|
+
Extracts metadata from a Markdown file.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
# parse file
|
|
57
|
+
document = Scanner().read(absolute_path)
|
|
58
|
+
if document.page_id is not None:
|
|
59
|
+
page_id = document.page_id
|
|
60
|
+
space_key = document.space_key or self.site.space_key or "HOME"
|
|
61
|
+
else:
|
|
62
|
+
if parent_id is None:
|
|
63
|
+
raise PageError(
|
|
64
|
+
f"expected: parent page ID for Markdown file with no linked Confluence page: {absolute_path}"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
hash = hashlib.md5(document.text.encode("utf-8"))
|
|
68
|
+
digest = "".join(f"{c:x}" for c in hash.digest())
|
|
69
|
+
LOGGER.info("Identifier %s assigned to page: %s", digest, absolute_path)
|
|
70
|
+
page_id = digest
|
|
71
|
+
space_key = self.site.space_key or "HOME"
|
|
72
|
+
|
|
73
|
+
return ConfluencePageMetadata(
|
|
74
|
+
page_id=page_id,
|
|
75
|
+
space_key=space_key,
|
|
76
|
+
title="",
|
|
77
|
+
overwrite=True,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def _save_document(
|
|
81
|
+
self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
|
|
82
|
+
) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Saves a new version of a Confluence document.
|
|
85
|
+
|
|
86
|
+
A derived class may invoke Confluence REST API to persist the new version.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
content = document.xhtml()
|
|
90
|
+
out_path = self.out_dir / path.relative_to(self.root_dir).with_suffix(".csf")
|
|
91
|
+
os.makedirs(out_path.parent, exist_ok=True)
|
|
92
|
+
with open(out_path, "w", encoding="utf-8") as f:
|
|
93
|
+
f.write(content)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class LocalProcessorFactory(ProcessorFactory):
|
|
97
|
+
out_dir: Optional[Path]
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
options: ConfluenceDocumentOptions,
|
|
102
|
+
site: ConfluenceSiteMetadata,
|
|
103
|
+
out_dir: Optional[Path] = None,
|
|
104
|
+
) -> None:
|
|
105
|
+
super().__init__(options, site)
|
|
106
|
+
self.out_dir = out_dir
|
|
107
|
+
|
|
108
|
+
def create(self, root_dir: Path) -> Processor:
|
|
109
|
+
return LocalProcessor(
|
|
110
|
+
self.options, self.site, out_dir=self.out_dir, root_dir=root_dir
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class LocalConverter(Converter):
|
|
115
|
+
"""
|
|
116
|
+
The entry point for Markdown to Confluence conversion.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
options: ConfluenceDocumentOptions,
|
|
122
|
+
site: ConfluenceSiteMetadata,
|
|
123
|
+
out_dir: Optional[Path] = None,
|
|
124
|
+
) -> None:
|
|
125
|
+
super().__init__(LocalProcessorFactory(options, site, out_dir))
|
md2conf/matcher.py
CHANGED
|
@@ -10,15 +10,15 @@ import os.path
|
|
|
10
10
|
from dataclasses import dataclass
|
|
11
11
|
from fnmatch import fnmatch
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import Iterable, Optional
|
|
13
|
+
from typing import Iterable, Optional, Union, overload
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
@dataclass
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
17
|
class Entry:
|
|
18
18
|
"""
|
|
19
19
|
Represents a file or directory entry.
|
|
20
20
|
|
|
21
|
-
:param name: Name of the file-system entry.
|
|
21
|
+
:param name: Name of the file-system entry to match against the rule-set.
|
|
22
22
|
:param is_dir: True if the entry is a directory.
|
|
23
23
|
"""
|
|
24
24
|
|
|
@@ -43,6 +43,15 @@ class MatcherOptions:
|
|
|
43
43
|
self.extension = f".{self.extension}"
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
def _entry_name_dir(entry: Union[Entry, os.DirEntry[str]]) -> tuple[str, bool]:
|
|
47
|
+
if isinstance(entry, Entry):
|
|
48
|
+
return entry.name, entry.is_dir
|
|
49
|
+
elif isinstance(entry, os.DirEntry):
|
|
50
|
+
return entry.name, entry.is_dir()
|
|
51
|
+
else:
|
|
52
|
+
raise NotImplementedError("type match not exhaustive")
|
|
53
|
+
|
|
54
|
+
|
|
46
55
|
class Matcher:
|
|
47
56
|
"Compares file and directory names against a list of exclude/include patterns."
|
|
48
57
|
|
|
@@ -58,20 +67,40 @@ class Matcher:
|
|
|
58
67
|
else:
|
|
59
68
|
self.rules = []
|
|
60
69
|
|
|
70
|
+
for rule in self.rules:
|
|
71
|
+
if "/" in rule or os.path.sep in rule:
|
|
72
|
+
raise ValueError(f"nested matching not supported: {rule}")
|
|
73
|
+
|
|
61
74
|
def extension_matches(self, name: str) -> bool:
|
|
62
75
|
"True if the file name has the expected extension."
|
|
63
76
|
|
|
64
77
|
return self.options.extension is None or name.endswith(self.options.extension)
|
|
65
78
|
|
|
66
|
-
|
|
79
|
+
@overload
|
|
80
|
+
def is_excluded(self, entry: Entry) -> bool:
|
|
81
|
+
"""
|
|
82
|
+
True if the file or directory name matches any of the exclusion patterns.
|
|
83
|
+
|
|
84
|
+
:param entry: A data-class object.
|
|
85
|
+
:returns: True if the name matches at least one of the exclusion patterns.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
...
|
|
89
|
+
|
|
90
|
+
@overload
|
|
91
|
+
def is_excluded(self, entry: os.DirEntry[str]) -> bool:
|
|
67
92
|
"""
|
|
68
93
|
True if the file or directory name matches any of the exclusion patterns.
|
|
69
94
|
|
|
70
|
-
:param
|
|
71
|
-
:param is_dir: Whether the name identifies a directory.
|
|
95
|
+
:param entry: An object returned by `scandir`.
|
|
72
96
|
:returns: True if the name matches at least one of the exclusion patterns.
|
|
73
97
|
"""
|
|
74
98
|
|
|
99
|
+
...
|
|
100
|
+
|
|
101
|
+
def is_excluded(self, entry: Union[Entry, os.DirEntry[str]]) -> bool:
|
|
102
|
+
name, is_dir = _entry_name_dir(entry)
|
|
103
|
+
|
|
75
104
|
# skip hidden files and directories
|
|
76
105
|
if name.startswith("."):
|
|
77
106
|
return True
|
|
@@ -86,26 +115,38 @@ class Matcher:
|
|
|
86
115
|
else:
|
|
87
116
|
return False
|
|
88
117
|
|
|
89
|
-
|
|
118
|
+
@overload
|
|
119
|
+
def is_included(self, entry: Entry) -> bool:
|
|
120
|
+
"""
|
|
121
|
+
True if the file or directory name matches none of the exclusion patterns.
|
|
122
|
+
|
|
123
|
+
:param entry: A data-class object.
|
|
124
|
+
:returns: True if the name doesn't match any of the exclusion patterns.
|
|
125
|
+
"""
|
|
126
|
+
...
|
|
127
|
+
|
|
128
|
+
@overload
|
|
129
|
+
def is_included(self, entry: os.DirEntry[str]) -> bool:
|
|
90
130
|
"""
|
|
91
131
|
True if the file or directory name matches none of the exclusion patterns.
|
|
92
132
|
|
|
93
|
-
:param
|
|
94
|
-
:param is_dir: Whether the name identifies a directory.
|
|
133
|
+
:param entry: An object returned by `scandir`.
|
|
95
134
|
:returns: True if the name doesn't match any of the exclusion patterns.
|
|
96
135
|
"""
|
|
136
|
+
...
|
|
97
137
|
|
|
98
|
-
|
|
138
|
+
def is_included(self, entry: Union[Entry, os.DirEntry[str]]) -> bool:
|
|
139
|
+
return not self.is_excluded(entry)
|
|
99
140
|
|
|
100
|
-
def filter(self,
|
|
141
|
+
def filter(self, entries: Iterable[Entry]) -> list[Entry]:
|
|
101
142
|
"""
|
|
102
143
|
Returns only those elements from the input that don't match any of the exclusion rules.
|
|
103
144
|
|
|
104
|
-
:param
|
|
145
|
+
:param entries: A list of names to filter.
|
|
105
146
|
:returns: A filtered list of names that didn't match any of the exclusion rules.
|
|
106
147
|
"""
|
|
107
148
|
|
|
108
|
-
return [
|
|
149
|
+
return [entry for entry in entries if self.is_included(entry)]
|
|
109
150
|
|
|
110
151
|
def scandir(self, path: Path) -> list[Entry]:
|
|
111
152
|
"""
|
md2conf/mermaid.py
CHANGED
|
@@ -79,10 +79,16 @@ def render_diagram(source: str, output_format: Literal["png", "svg"] = "png") ->
|
|
|
79
79
|
)
|
|
80
80
|
stdout, stderr = proc.communicate(input=source.encode("utf-8"))
|
|
81
81
|
if proc.returncode:
|
|
82
|
-
|
|
83
|
-
f"failed to convert Mermaid diagram; exit code: {proc.returncode}
|
|
84
|
-
|
|
85
|
-
)
|
|
82
|
+
messages = [
|
|
83
|
+
f"failed to convert Mermaid diagram; exit code: {proc.returncode}"
|
|
84
|
+
]
|
|
85
|
+
console_output = stdout.decode("utf-8")
|
|
86
|
+
if console_output:
|
|
87
|
+
messages.append(f"output:\n{console_output}")
|
|
88
|
+
console_error = stderr.decode("utf-8")
|
|
89
|
+
if console_error:
|
|
90
|
+
messages.append(f"error:\n{console_error}")
|
|
91
|
+
raise RuntimeError("\n".join(messages))
|
|
86
92
|
with open(filename, "rb") as image:
|
|
87
93
|
return image.read()
|
|
88
94
|
|
md2conf/metadata.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Publish Markdown files to Confluence wiki.
|
|
3
|
+
|
|
4
|
+
Copyright 2022-2025, Levente Hunyadi
|
|
5
|
+
|
|
6
|
+
:see: https://github.com/hunyadi/md2conf
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ConfluenceSiteMetadata:
|
|
15
|
+
"""
|
|
16
|
+
Data associated with a Confluence wiki site.
|
|
17
|
+
|
|
18
|
+
:param domain: Confluence organization domain (e.g. `levente-hunyadi.atlassian.net`).
|
|
19
|
+
:param base_path: Base path for Confluence (default: `/wiki/`).
|
|
20
|
+
:param space_key: Confluence space key for new pages (e.g. `~hunyadi` or `INST`).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
domain: str
|
|
24
|
+
base_path: str
|
|
25
|
+
space_key: Optional[str]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ConfluencePageMetadata:
|
|
30
|
+
"""
|
|
31
|
+
Data associated with a Confluence page.
|
|
32
|
+
|
|
33
|
+
:param page_id: Confluence page ID.
|
|
34
|
+
:param space_key: Confluence space key.
|
|
35
|
+
:param title: Document title.
|
|
36
|
+
:param overwrite: True if operations are allowed to update document properties (e.g. title).
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
page_id: str
|
|
40
|
+
space_key: str
|
|
41
|
+
title: str
|
|
42
|
+
overwrite: bool
|
md2conf/processor.py
CHANGED
|
@@ -6,101 +6,94 @@ Copyright 2022-2025, Levente Hunyadi
|
|
|
6
6
|
:see: https://github.com/hunyadi/md2conf
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import hashlib
|
|
10
9
|
import logging
|
|
11
10
|
import os
|
|
11
|
+
from abc import abstractmethod
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from typing import Optional
|
|
14
14
|
|
|
15
|
-
from .converter import
|
|
16
|
-
ConfluenceDocument,
|
|
17
|
-
ConfluenceDocumentOptions,
|
|
18
|
-
ConfluencePageMetadata,
|
|
19
|
-
ConfluenceQualifiedID,
|
|
20
|
-
ConfluenceSiteMetadata,
|
|
21
|
-
extract_qualified_id,
|
|
22
|
-
)
|
|
15
|
+
from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
|
|
23
16
|
from .matcher import Matcher, MatcherOptions
|
|
17
|
+
from .metadata import ConfluencePageMetadata, ConfluenceSiteMetadata
|
|
24
18
|
from .properties import ArgumentError
|
|
25
19
|
|
|
26
20
|
LOGGER = logging.getLogger(__name__)
|
|
27
21
|
|
|
28
22
|
|
|
29
23
|
class Processor:
|
|
24
|
+
"""
|
|
25
|
+
Processes a single Markdown page or a directory of Markdown pages.
|
|
26
|
+
"""
|
|
27
|
+
|
|
30
28
|
options: ConfluenceDocumentOptions
|
|
31
|
-
|
|
29
|
+
site: ConfluenceSiteMetadata
|
|
30
|
+
root_dir: Path
|
|
31
|
+
|
|
32
|
+
page_metadata: dict[Path, ConfluencePageMetadata]
|
|
32
33
|
|
|
33
34
|
def __init__(
|
|
34
|
-
self,
|
|
35
|
+
self,
|
|
36
|
+
options: ConfluenceDocumentOptions,
|
|
37
|
+
site: ConfluenceSiteMetadata,
|
|
38
|
+
root_dir: Path,
|
|
35
39
|
) -> None:
|
|
36
40
|
self.options = options
|
|
37
|
-
self.
|
|
41
|
+
self.site = site
|
|
42
|
+
self.root_dir = root_dir
|
|
38
43
|
|
|
39
|
-
|
|
40
|
-
"Processes a single Markdown file or a directory of Markdown files."
|
|
44
|
+
self.page_metadata = {}
|
|
41
45
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
self.process_page(path)
|
|
47
|
-
else:
|
|
48
|
-
raise ArgumentError(f"expected: valid file or directory path; got: {path}")
|
|
49
|
-
|
|
50
|
-
def process_directory(
|
|
51
|
-
self, local_dir: Path, root_dir: Optional[Path] = None
|
|
52
|
-
) -> None:
|
|
53
|
-
"Recursively scans a directory hierarchy for Markdown files."
|
|
46
|
+
def process_directory(self, local_dir: Path) -> None:
|
|
47
|
+
"""
|
|
48
|
+
Recursively scans a directory hierarchy for Markdown files, and processes each, resolving cross-references.
|
|
49
|
+
"""
|
|
54
50
|
|
|
55
51
|
local_dir = local_dir.resolve(True)
|
|
56
|
-
|
|
57
|
-
root_dir = local_dir
|
|
58
|
-
else:
|
|
59
|
-
root_dir = root_dir.resolve(True)
|
|
60
|
-
|
|
61
|
-
LOGGER.info("Synchronizing directory: %s", local_dir)
|
|
52
|
+
LOGGER.info("Processing directory: %s", local_dir)
|
|
62
53
|
|
|
63
54
|
# Step 1: build index of all page metadata
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
LOGGER.info("Indexed %d page(s)", len(page_metadata))
|
|
55
|
+
self._index_directory(local_dir, self.options.root_page_id)
|
|
56
|
+
LOGGER.info("Indexed %d page(s)", len(self.page_metadata))
|
|
67
57
|
|
|
68
58
|
# Step 2: convert each page
|
|
69
|
-
for page_path in page_metadata.keys():
|
|
70
|
-
self._process_page(page_path
|
|
59
|
+
for page_path in self.page_metadata.keys():
|
|
60
|
+
self._process_page(page_path)
|
|
71
61
|
|
|
72
|
-
def process_page(self, path: Path
|
|
73
|
-
"
|
|
62
|
+
def process_page(self, path: Path) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Processes a single Markdown file.
|
|
65
|
+
"""
|
|
74
66
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
else:
|
|
79
|
-
root_dir = root_dir.resolve(True)
|
|
80
|
-
|
|
81
|
-
self._process_page(path, root_dir, {})
|
|
82
|
-
|
|
83
|
-
def _process_page(
|
|
84
|
-
self,
|
|
85
|
-
path: Path,
|
|
86
|
-
root_dir: Path,
|
|
87
|
-
page_metadata: dict[Path, ConfluencePageMetadata],
|
|
88
|
-
) -> None:
|
|
89
|
-
"Processes a single Markdown file."
|
|
67
|
+
LOGGER.info("Processing page: %s", path)
|
|
68
|
+
self._index_page(path, self.options.root_page_id)
|
|
69
|
+
self._process_page(path)
|
|
90
70
|
|
|
91
|
-
|
|
92
|
-
|
|
71
|
+
def _process_page(self, path: Path) -> None:
|
|
72
|
+
page_id, document = ConfluenceDocument.create(
|
|
73
|
+
path, self.options, self.root_dir, self.site, self.page_metadata
|
|
93
74
|
)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
75
|
+
self._save_document(page_id, document, path)
|
|
76
|
+
|
|
77
|
+
@abstractmethod
|
|
78
|
+
def _get_or_create_page(
|
|
79
|
+
self, absolute_path: Path, parent_id: Optional[ConfluencePageID]
|
|
80
|
+
) -> ConfluencePageMetadata:
|
|
81
|
+
"""
|
|
82
|
+
Creates a new Confluence page if no page is linked in the Markdown document.
|
|
83
|
+
"""
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
def _save_document(
|
|
88
|
+
self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
|
|
89
|
+
) -> None: ...
|
|
97
90
|
|
|
98
91
|
def _index_directory(
|
|
99
|
-
self,
|
|
100
|
-
local_dir: Path,
|
|
101
|
-
page_metadata: dict[Path, ConfluencePageMetadata],
|
|
92
|
+
self, local_dir: Path, parent_id: Optional[ConfluencePageID]
|
|
102
93
|
) -> None:
|
|
103
|
-
"
|
|
94
|
+
"""
|
|
95
|
+
Indexes Markdown files in a directory hierarchy recursively.
|
|
96
|
+
"""
|
|
104
97
|
|
|
105
98
|
LOGGER.info("Indexing directory: %s", local_dir)
|
|
106
99
|
|
|
@@ -109,7 +102,7 @@ class Processor:
|
|
|
109
102
|
files: list[Path] = []
|
|
110
103
|
directories: list[Path] = []
|
|
111
104
|
for entry in os.scandir(local_dir):
|
|
112
|
-
if matcher.is_excluded(entry
|
|
105
|
+
if matcher.is_excluded(entry):
|
|
113
106
|
continue
|
|
114
107
|
|
|
115
108
|
if entry.is_file():
|
|
@@ -117,32 +110,107 @@ class Processor:
|
|
|
117
110
|
elif entry.is_dir():
|
|
118
111
|
directories.append(Path(local_dir) / entry.name)
|
|
119
112
|
|
|
113
|
+
# make page act as parent node
|
|
114
|
+
parent_doc: Optional[Path] = None
|
|
115
|
+
if (Path(local_dir) / "index.md") in files:
|
|
116
|
+
parent_doc = Path(local_dir) / "index.md"
|
|
117
|
+
elif (Path(local_dir) / "README.md") in files:
|
|
118
|
+
parent_doc = Path(local_dir) / "README.md"
|
|
119
|
+
elif (Path(local_dir) / f"{local_dir.name}.md") in files:
|
|
120
|
+
parent_doc = Path(local_dir) / f"{local_dir.name}.md"
|
|
121
|
+
|
|
122
|
+
if parent_doc is None and self.options.keep_hierarchy:
|
|
123
|
+
parent_doc = Path(local_dir) / "index.md"
|
|
124
|
+
|
|
125
|
+
# create a blank page for directory entry
|
|
126
|
+
with open(parent_doc, "w"):
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
if parent_doc is not None:
|
|
130
|
+
if parent_doc in files:
|
|
131
|
+
files.remove(parent_doc)
|
|
132
|
+
|
|
133
|
+
# use latest parent as parent for index page
|
|
134
|
+
metadata = self._get_or_create_page(parent_doc, parent_id)
|
|
135
|
+
LOGGER.debug("Indexed parent %s with metadata: %s", parent_doc, metadata)
|
|
136
|
+
self.page_metadata[parent_doc] = metadata
|
|
137
|
+
|
|
138
|
+
# assign new index page as new parent
|
|
139
|
+
parent_id = ConfluencePageID(metadata.page_id)
|
|
140
|
+
|
|
120
141
|
for doc in files:
|
|
121
|
-
|
|
122
|
-
LOGGER.debug("Indexed %s with metadata: %s", doc, metadata)
|
|
123
|
-
page_metadata[doc] = metadata
|
|
142
|
+
self._index_page(doc, parent_id)
|
|
124
143
|
|
|
125
144
|
for directory in directories:
|
|
126
|
-
self._index_directory(directory,
|
|
127
|
-
|
|
128
|
-
def
|
|
129
|
-
"
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
145
|
+
self._index_directory(directory, parent_id)
|
|
146
|
+
|
|
147
|
+
def _index_page(self, path: Path, parent_id: Optional[ConfluencePageID]) -> None:
|
|
148
|
+
"""
|
|
149
|
+
Indexes a single Markdown file.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
metadata = self._get_or_create_page(path, parent_id)
|
|
153
|
+
LOGGER.debug("Indexed %s with metadata: %s", path, metadata)
|
|
154
|
+
self.page_metadata[path] = metadata
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class ProcessorFactory:
|
|
158
|
+
options: ConfluenceDocumentOptions
|
|
159
|
+
site: ConfluenceSiteMetadata
|
|
160
|
+
|
|
161
|
+
def __init__(
|
|
162
|
+
self, options: ConfluenceDocumentOptions, site: ConfluenceSiteMetadata
|
|
163
|
+
) -> None:
|
|
164
|
+
self.options = options
|
|
165
|
+
self.site = site
|
|
166
|
+
|
|
167
|
+
@abstractmethod
|
|
168
|
+
def create(self, root_dir: Path) -> Processor: ...
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class Converter:
|
|
172
|
+
factory: ProcessorFactory
|
|
173
|
+
|
|
174
|
+
def __init__(self, factory: ProcessorFactory) -> None:
|
|
175
|
+
self.factory = factory
|
|
176
|
+
|
|
177
|
+
def process(self, path: Path) -> None:
|
|
178
|
+
"""
|
|
179
|
+
Processes a single Markdown file or a directory of Markdown files.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
path = path.resolve(True)
|
|
183
|
+
if path.is_dir():
|
|
184
|
+
self.process_directory(path)
|
|
185
|
+
elif path.is_file():
|
|
186
|
+
self.process_page(path)
|
|
187
|
+
else:
|
|
188
|
+
raise ArgumentError(f"expected: valid file or directory path; got: {path}")
|
|
189
|
+
|
|
190
|
+
def process_directory(
|
|
191
|
+
self, local_dir: Path, root_dir: Optional[Path] = None
|
|
192
|
+
) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Recursively scans a directory hierarchy for Markdown files, and processes each, resolving cross-references.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
local_dir = local_dir.resolve(True)
|
|
198
|
+
if root_dir is None:
|
|
199
|
+
root_dir = local_dir
|
|
200
|
+
else:
|
|
201
|
+
root_dir = root_dir.resolve(True)
|
|
202
|
+
|
|
203
|
+
self.factory.create(root_dir).process_directory(local_dir)
|
|
204
|
+
|
|
205
|
+
def process_page(self, path: Path, root_dir: Optional[Path] = None) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Processes a single Markdown file.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
path = path.resolve(True)
|
|
211
|
+
if root_dir is None:
|
|
212
|
+
root_dir = path.parent
|
|
213
|
+
else:
|
|
214
|
+
root_dir = root_dir.resolve(True)
|
|
215
|
+
|
|
216
|
+
self.factory.create(root_dir).process_page(path)
|