docspan 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docspan/__init__.py +3 -0
- docspan/__main__.py +0 -0
- docspan/backends/__init__.py +19 -0
- docspan/backends/base.py +85 -0
- docspan/backends/confluence/__init__.py +0 -0
- docspan/backends/confluence/adf/__init__.py +14 -0
- docspan/backends/confluence/adf/comparator.py +427 -0
- docspan/backends/confluence/adf/converter.py +119 -0
- docspan/backends/confluence/adf/converters.py +1449 -0
- docspan/backends/confluence/adf/interfaces.py +191 -0
- docspan/backends/confluence/adf/nodes.py +2085 -0
- docspan/backends/confluence/adf/parser.py +400 -0
- docspan/backends/confluence/adf/validators.py +161 -0
- docspan/backends/confluence/adf/visitors.py +495 -0
- docspan/backends/confluence/backend.py +227 -0
- docspan/backends/confluence/client.py +44 -0
- docspan/backends/confluence/config/__init__.py +21 -0
- docspan/backends/confluence/config/loader.py +107 -0
- docspan/backends/confluence/config/models.py +167 -0
- docspan/backends/confluence/config/validation.py +297 -0
- docspan/backends/confluence/markdown/__init__.py +22 -0
- docspan/backends/confluence/markdown/ast.py +819 -0
- docspan/backends/confluence/markdown/extensions/__init__.py +5 -0
- docspan/backends/confluence/markdown/extensions/frontmatter.py +80 -0
- docspan/backends/confluence/markdown/extensions/mermaid.py +64 -0
- docspan/backends/confluence/markdown/extensions/wikilinks.py +179 -0
- docspan/backends/confluence/markdown/inline_parser.py +495 -0
- docspan/backends/confluence/markdown/parser.py +1006 -0
- docspan/backends/confluence/models/__init__.py +18 -0
- docspan/backends/confluence/models/markdown_file.py +402 -0
- docspan/backends/confluence/models/page.py +212 -0
- docspan/backends/confluence/models/path_utils.py +34 -0
- docspan/backends/confluence/models/results.py +28 -0
- docspan/backends/confluence/models/sync_status.py +382 -0
- docspan/backends/confluence/services/__init__.py +0 -0
- docspan/backends/confluence/services/confluence/__init__.py +40 -0
- docspan/backends/confluence/services/confluence/attachment_client.py +147 -0
- docspan/backends/confluence/services/confluence/base_client.py +420 -0
- docspan/backends/confluence/services/confluence/client.py +376 -0
- docspan/backends/confluence/services/confluence/comment_client.py +682 -0
- docspan/backends/confluence/services/confluence/crawler.py +587 -0
- docspan/backends/confluence/services/confluence/label_client.py +130 -0
- docspan/backends/confluence/services/confluence/page_client.py +1288 -0
- docspan/backends/confluence/services/confluence/space_client.py +179 -0
- docspan/backends/confluence/services/confluence/url_parser.py +106 -0
- docspan/backends/google_docs/__init__.py +0 -0
- docspan/backends/google_docs/auth.py +143 -0
- docspan/backends/google_docs/backend.py +140 -0
- docspan/backends/google_docs/client.py +665 -0
- docspan/backends/google_docs/converter.py +471 -0
- docspan/backends/google_docs/docs_request_builder.py +232 -0
- docspan/backends/google_docs/docs_structure_parser.py +120 -0
- docspan/backends/google_docs/markdown_to_paragraph_parser.py +145 -0
- docspan/cli/__init__.py +0 -0
- docspan/cli/main.py +408 -0
- docspan/config.py +62 -0
- docspan/core/__init__.py +49 -0
- docspan/core/merge.py +30 -0
- docspan/core/orchestrator.py +332 -0
- docspan/core/paths.py +8 -0
- docspan/core/state.py +53 -0
- docspan-0.1.0.dist-info/METADATA +273 -0
- docspan-0.1.0.dist-info/RECORD +65 -0
- docspan-0.1.0.dist-info/WHEEL +4 -0
- docspan-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""Confluence backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import pathlib
|
|
8
|
+
import re
|
|
9
|
+
from typing import TYPE_CHECKING, Optional
|
|
10
|
+
|
|
11
|
+
import markdownify as md_lib
|
|
12
|
+
|
|
13
|
+
from docspan.backends.base import Backend, PullResult, PushResult
|
|
14
|
+
from docspan.backends.confluence.adf.converter import AdfConverter
|
|
15
|
+
from docspan.backends.confluence.config.models import ConfluenceConfig as InternalConfluenceConfig
|
|
16
|
+
from docspan.backends.confluence.markdown.parser import MarkdownParser
|
|
17
|
+
from docspan.backends.confluence.models.page import ConfluencePage
|
|
18
|
+
from docspan.backends.confluence.services.confluence.client import ConfluenceClient
|
|
19
|
+
from docspan.backends.confluence.services.confluence.comment_client import ConfluenceCommentClient
|
|
20
|
+
from docspan.config import ConfluenceConfig
|
|
21
|
+
from docspan.core.paths import COMMENTS_SUFFIX
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from docspan.config import MarkgateConfig
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ConfluenceBackend(Backend):
|
|
30
|
+
name = "confluence"
|
|
31
|
+
|
|
32
|
+
def __init__(self, config: ConfluenceConfig) -> None:
|
|
33
|
+
self.config = config
|
|
34
|
+
self._client: Optional[ConfluenceClient] = None
|
|
35
|
+
self._comment_client: Optional[ConfluenceCommentClient] = None
|
|
36
|
+
self._internal_cfg: Optional[InternalConfluenceConfig] = None
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def from_config(cls, markgate_config: "MarkgateConfig") -> "ConfluenceBackend":
|
|
40
|
+
return cls(markgate_config.backends.confluence or ConfluenceConfig())
|
|
41
|
+
|
|
42
|
+
def _ensure_client(self) -> None:
|
|
43
|
+
if self._client is not None:
|
|
44
|
+
return
|
|
45
|
+
base_url = self.config.base_url or os.getenv("CONFLUENCE_BASE_URL")
|
|
46
|
+
username = self.config.username or os.getenv("ATLASSIAN_USER_NAME")
|
|
47
|
+
api_token = self.config.api_token or os.getenv("CONFLUENCE_API_TOKEN")
|
|
48
|
+
if not all([base_url, username, api_token]):
|
|
49
|
+
raise RuntimeError(
|
|
50
|
+
"Confluence credentials incomplete. Run: docspan auth setup confluence\n"
|
|
51
|
+
"Or set CONFLUENCE_BASE_URL, ATLASSIAN_USER_NAME, CONFLUENCE_API_TOKEN."
|
|
52
|
+
)
|
|
53
|
+
self._internal_cfg = InternalConfluenceConfig(
|
|
54
|
+
base_url=base_url,
|
|
55
|
+
username=username,
|
|
56
|
+
api_token=api_token,
|
|
57
|
+
)
|
|
58
|
+
self._client = ConfluenceClient(self._internal_cfg)
|
|
59
|
+
|
|
60
|
+
def _ensure_comment_client(self) -> None:
|
|
61
|
+
if self._comment_client is not None:
|
|
62
|
+
return
|
|
63
|
+
self._ensure_client()
|
|
64
|
+
assert self._internal_cfg is not None
|
|
65
|
+
self._comment_client = ConfluenceCommentClient(self._internal_cfg)
|
|
66
|
+
|
|
67
|
+
# ── Comment sidecar ────────────────────────────────────────────────────
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _format_comment(comment: dict) -> list[str]:
|
|
71
|
+
comment_id = comment.get("id", "unknown")
|
|
72
|
+
author = (
|
|
73
|
+
comment.get("createdBy", {}).get("displayName")
|
|
74
|
+
or comment.get("version", {}).get("by", {}).get("displayName", "Unknown")
|
|
75
|
+
)
|
|
76
|
+
date = (
|
|
77
|
+
comment.get("version", {}).get("createdAt", "")
|
|
78
|
+
or comment.get("version", {}).get("friendlyWhen", "")
|
|
79
|
+
)
|
|
80
|
+
lines: list[str] = [f"### [{comment_id}] {author} — {date}", ""]
|
|
81
|
+
selection = (
|
|
82
|
+
comment.get("properties", {}).get("inlineOriginalSelection", "")
|
|
83
|
+
or comment.get("inlineCommentProperties", {}).get("textSelection", "")
|
|
84
|
+
)
|
|
85
|
+
if selection:
|
|
86
|
+
lines += [f'> Selection: "{selection}"', ""]
|
|
87
|
+
body_html = (
|
|
88
|
+
comment.get("body", {}).get("storage", {}).get("value", "")
|
|
89
|
+
or comment.get("body", {}).get("view", {}).get("value", "")
|
|
90
|
+
)
|
|
91
|
+
body_text = re.sub(r"<[^<]+?>", "", body_html).strip()
|
|
92
|
+
if body_text:
|
|
93
|
+
lines.append(body_text)
|
|
94
|
+
lines += ["", "---", ""]
|
|
95
|
+
return lines
|
|
96
|
+
|
|
97
|
+
def _write_comment_sidecar(
|
|
98
|
+
self,
|
|
99
|
+
local_path: str,
|
|
100
|
+
page_title: str,
|
|
101
|
+
inline_comments: list,
|
|
102
|
+
footer_comments: list,
|
|
103
|
+
) -> None:
|
|
104
|
+
lines = [f"# Comments: {page_title}", ""]
|
|
105
|
+
if inline_comments:
|
|
106
|
+
lines += ["## Inline comments", ""]
|
|
107
|
+
for comment in inline_comments:
|
|
108
|
+
lines += self._format_comment(comment)
|
|
109
|
+
if footer_comments:
|
|
110
|
+
lines += ["## Footer comments", ""]
|
|
111
|
+
for comment in footer_comments:
|
|
112
|
+
lines += self._format_comment(comment)
|
|
113
|
+
sidecar_path = str(local_path) + COMMENTS_SUFFIX
|
|
114
|
+
pathlib.Path(sidecar_path).write_text("\n".join(lines))
|
|
115
|
+
|
|
116
|
+
# ── Backend interface ──────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
def push(self, local_path: str, doc_id: str, **kwargs: object) -> PushResult:
|
|
119
|
+
"""Convert local markdown to ADF and update the Confluence page."""
|
|
120
|
+
self._ensure_client()
|
|
121
|
+
assert self._client is not None
|
|
122
|
+
try:
|
|
123
|
+
content = pathlib.Path(local_path).read_text()
|
|
124
|
+
ast = MarkdownParser().parse(content)
|
|
125
|
+
adf_doc = AdfConverter().convert(ast)
|
|
126
|
+
|
|
127
|
+
page = self._client.get_page(doc_id)
|
|
128
|
+
title = page.get("title", "Untitled")
|
|
129
|
+
version = page.get("version", {}).get("number", 1)
|
|
130
|
+
parent_id = page.get("parentId") or (page.get("ancestors") or [{}])[-1].get("id", "")
|
|
131
|
+
|
|
132
|
+
confluence_page = ConfluencePage(
|
|
133
|
+
id=doc_id,
|
|
134
|
+
title=title,
|
|
135
|
+
content=adf_doc,
|
|
136
|
+
parent_id=parent_id,
|
|
137
|
+
version=version,
|
|
138
|
+
)
|
|
139
|
+
self._client.update_page(confluence_page)
|
|
140
|
+
base_url = self.config.base_url or ""
|
|
141
|
+
return PushResult(status="ok", doc_id=doc_id, url=f"{base_url}/pages/{doc_id}")
|
|
142
|
+
except Exception as exc:
|
|
143
|
+
return PushResult(status="error", doc_id=doc_id, message=str(exc))
|
|
144
|
+
|
|
145
|
+
def pull(self, doc_id: str, local_path: str, **kwargs: object) -> PullResult:
|
|
146
|
+
"""Fetch Confluence page storage content and convert to markdown."""
|
|
147
|
+
self._ensure_client()
|
|
148
|
+
assert self._client is not None
|
|
149
|
+
try:
|
|
150
|
+
page = self._client.get_page(doc_id)
|
|
151
|
+
storage_html = page.get("body", {}).get("storage", {}).get("value", "")
|
|
152
|
+
markdown = md_lib.markdownify(storage_html, heading_style="ATX", strip=["script", "style"])
|
|
153
|
+
pathlib.Path(local_path).parent.mkdir(parents=True, exist_ok=True)
|
|
154
|
+
pathlib.Path(local_path).write_text(markdown)
|
|
155
|
+
|
|
156
|
+
page_title = page.get("title", "Untitled")
|
|
157
|
+
try:
|
|
158
|
+
self._ensure_comment_client()
|
|
159
|
+
except Exception:
|
|
160
|
+
logger.warning("Could not initialise comment client for %s; skipping comments", doc_id, exc_info=True)
|
|
161
|
+
|
|
162
|
+
inline_comments: list = []
|
|
163
|
+
footer_comments: list = []
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
inline_comments = self._comment_client.get_page_inline_comments(doc_id) # type: ignore[union-attr]
|
|
167
|
+
except Exception as exc:
|
|
168
|
+
try:
|
|
169
|
+
v1_result = self._comment_client.get_comments(doc_id) # type: ignore[union-attr]
|
|
170
|
+
inline_comments = v1_result.get("results", [])
|
|
171
|
+
except Exception as fallback_exc:
|
|
172
|
+
logger.warning(
|
|
173
|
+
"Could not fetch inline comments for %s (v2: %s, v1 fallback: %s)",
|
|
174
|
+
doc_id, exc, fallback_exc,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
footer_comments = self._comment_client.get_page_footer_comments(doc_id) # type: ignore[union-attr]
|
|
179
|
+
except Exception as exc:
|
|
180
|
+
logger.warning("Could not fetch footer comments for %s: %s", doc_id, exc)
|
|
181
|
+
|
|
182
|
+
if inline_comments or footer_comments:
|
|
183
|
+
self._write_comment_sidecar(local_path, page_title, inline_comments, footer_comments)
|
|
184
|
+
|
|
185
|
+
return PullResult(status="ok", doc_id=doc_id, local_path=local_path)
|
|
186
|
+
except Exception as exc:
|
|
187
|
+
return PullResult(status="error", doc_id=doc_id, local_path=local_path, message=str(exc))
|
|
188
|
+
|
|
189
|
+
def get_remote_version(self, doc_id: str) -> str:
|
|
190
|
+
"""Return the current Confluence page version number as a string."""
|
|
191
|
+
self._ensure_client()
|
|
192
|
+
assert self._client is not None
|
|
193
|
+
page = self._client.get_page(doc_id)
|
|
194
|
+
return str(page["version"]["number"])
|
|
195
|
+
|
|
196
|
+
def auth_setup(self) -> None:
|
|
197
|
+
"""Interactive Confluence auth setup — prompts for credentials and prints YAML snippet."""
|
|
198
|
+
print("\nConfluence auth setup")
|
|
199
|
+
print("=" * 40)
|
|
200
|
+
base_url = input("Confluence base URL (e.g. https://yourorg.atlassian.net): ").strip()
|
|
201
|
+
username = input("Atlassian username (email): ").strip()
|
|
202
|
+
print(
|
|
203
|
+
f"\nAdd to markgate.yaml:\n\n"
|
|
204
|
+
f"backends:\n"
|
|
205
|
+
f" confluence:\n"
|
|
206
|
+
f" base_url: {base_url}\n"
|
|
207
|
+
f" username: {username}\n"
|
|
208
|
+
f" api_token: <your-token> # from id.atlassian.com/manage-profile/security/api-tokens\n"
|
|
209
|
+
)
|
|
210
|
+
print("Done. Test with: docspan status")
|
|
211
|
+
|
|
212
|
+
def validate_config(self) -> None:
|
|
213
|
+
base_url = self.config.base_url or os.getenv("CONFLUENCE_BASE_URL")
|
|
214
|
+
username = self.config.username or os.getenv("ATLASSIAN_USER_NAME")
|
|
215
|
+
api_token = self.config.api_token or os.getenv("CONFLUENCE_API_TOKEN")
|
|
216
|
+
missing = []
|
|
217
|
+
if not base_url:
|
|
218
|
+
missing.append("base_url / CONFLUENCE_BASE_URL")
|
|
219
|
+
if not username:
|
|
220
|
+
missing.append("username / ATLASSIAN_USER_NAME")
|
|
221
|
+
if not api_token:
|
|
222
|
+
missing.append("api_token / CONFLUENCE_API_TOKEN")
|
|
223
|
+
if missing:
|
|
224
|
+
raise ValueError(
|
|
225
|
+
f"Missing Confluence config: {', '.join(missing)}. "
|
|
226
|
+
"Run: docspan auth setup confluence"
|
|
227
|
+
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Confluence REST API client — stub, to be ported from markdown-confluence."""
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
from requests.auth import HTTPBasicAuth
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ConfluenceClient:
|
|
8
|
+
def __init__(self, base_url: str, username: str, api_token: str):
|
|
9
|
+
self.base_url = base_url.rstrip("/")
|
|
10
|
+
self.auth = HTTPBasicAuth(username, api_token)
|
|
11
|
+
self.session = requests.Session()
|
|
12
|
+
self.session.auth = self.auth
|
|
13
|
+
self.session.headers.update({"Content-Type": "application/json"})
|
|
14
|
+
|
|
15
|
+
def get_page(self, page_id: str) -> dict:
|
|
16
|
+
url = f"{self.base_url}/wiki/rest/api/content/{page_id}?expand=body.storage,version"
|
|
17
|
+
resp = self.session.get(url)
|
|
18
|
+
resp.raise_for_status()
|
|
19
|
+
return resp.json()
|
|
20
|
+
|
|
21
|
+
def get_page_as_markdown(self, page_id: str) -> str:
|
|
22
|
+
"""Fetch page storage format and convert to markdown."""
|
|
23
|
+
page = self.get_page(page_id)
|
|
24
|
+
storage_html = page["body"]["storage"]["value"]
|
|
25
|
+
# TODO: port ADF/storage → markdown conversion from markdown-confluence
|
|
26
|
+
return storage_html # placeholder
|
|
27
|
+
|
|
28
|
+
def update_page(self, page_id: str, markdown_content: str) -> dict:
|
|
29
|
+
"""Convert markdown to storage format and update the page."""
|
|
30
|
+
page = self.get_page(page_id)
|
|
31
|
+
current_version = page["version"]["number"]
|
|
32
|
+
title = page["title"]
|
|
33
|
+
# TODO: port markdown → ADF conversion from markdown-confluence
|
|
34
|
+
storage_value = markdown_content # placeholder
|
|
35
|
+
payload = {
|
|
36
|
+
"version": {"number": current_version + 1},
|
|
37
|
+
"title": title,
|
|
38
|
+
"type": "page",
|
|
39
|
+
"body": {"storage": {"value": storage_value, "representation": "storage"}},
|
|
40
|
+
}
|
|
41
|
+
url = f"{self.base_url}/wiki/rest/api/content/{page_id}"
|
|
42
|
+
resp = self.session.put(url, json=payload)
|
|
43
|
+
resp.raise_for_status()
|
|
44
|
+
return resp.json()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration module.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for loading and managing configuration
|
|
5
|
+
for the markdown-confluence package.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from docspan.backends.confluence.config.loader import load_config, load_config_from_dict
|
|
9
|
+
from docspan.backends.confluence.config.models import (
|
|
10
|
+
ConfluenceConfig,
|
|
11
|
+
MarkdownConfluenceConfig,
|
|
12
|
+
PublishConfig,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"ConfluenceConfig",
|
|
17
|
+
"PublishConfig",
|
|
18
|
+
"MarkdownConfluenceConfig",
|
|
19
|
+
"load_config",
|
|
20
|
+
"load_config_from_dict",
|
|
21
|
+
]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration loading utilities.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, Optional, Union
|
|
10
|
+
|
|
11
|
+
from docspan.backends.confluence.config.models import MarkdownConfluenceConfig
|
|
12
|
+
from docspan.backends.confluence.config.validation import validate_config_dict
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load_config(path: Union[str, Path], allow_env_only: bool = True, folder_to_publish: Optional[str] = None, require_parent_id: bool = False) -> MarkdownConfluenceConfig:
|
|
18
|
+
"""
|
|
19
|
+
Load configuration from a JSON file or environment variables.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
path: Path to the configuration file
|
|
23
|
+
allow_env_only: Whether to allow configuration from environment variables only
|
|
24
|
+
when config file is not found
|
|
25
|
+
folder_to_publish: Override the folder to publish in the config
|
|
26
|
+
require_parent_id: Whether to require parent_id in configuration (needed for publishing, not for crawling)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Configuration object
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
FileNotFoundError: If the file doesn't exist and allow_env_only is False
|
|
33
|
+
json.JSONDecodeError: If the file isn't valid JSON
|
|
34
|
+
ValueError: If the configuration is invalid
|
|
35
|
+
"""
|
|
36
|
+
config_path = Path(path)
|
|
37
|
+
config_data = {}
|
|
38
|
+
|
|
39
|
+
if config_path.exists():
|
|
40
|
+
# Load from file
|
|
41
|
+
with open(config_path, "r", encoding="utf-8") as f:
|
|
42
|
+
config_data = json.load(f)
|
|
43
|
+
elif not allow_env_only:
|
|
44
|
+
raise FileNotFoundError(f"Configuration file not found: {path}")
|
|
45
|
+
else:
|
|
46
|
+
# Try to load configuration from environment variables
|
|
47
|
+
# This will create an empty config_data and rely on environment variables
|
|
48
|
+
# for the required configuration values
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
# If folder_to_publish is provided, override the setting
|
|
52
|
+
if folder_to_publish:
|
|
53
|
+
if "publish" not in config_data:
|
|
54
|
+
config_data["publish"] = {}
|
|
55
|
+
config_data["publish"]["folder_to_publish"] = folder_to_publish
|
|
56
|
+
|
|
57
|
+
return load_config_from_dict(config_data, require_parent_id=require_parent_id)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def load_config_from_dict(config_data: Dict[str, Any], require_parent_id: bool = False) -> MarkdownConfluenceConfig:
|
|
61
|
+
"""
|
|
62
|
+
Load configuration from a dictionary.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
config_data: Dictionary with configuration values
|
|
66
|
+
require_parent_id: Whether to require parent_id in configuration (needed for publishing, not for crawling)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Configuration object
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
ValueError: If the configuration is invalid
|
|
73
|
+
"""
|
|
74
|
+
# Validate configuration structure and detect typos
|
|
75
|
+
corrected_config, validation_errors, validation_warnings = validate_config_dict(
|
|
76
|
+
config_data,
|
|
77
|
+
auto_correct=True # Auto-correct known typos like camelCase -> snake_case
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Show warnings for auto-corrections
|
|
81
|
+
for warning in validation_warnings:
|
|
82
|
+
logger.warning(f"Configuration: {warning}")
|
|
83
|
+
|
|
84
|
+
# Raise errors if validation failed
|
|
85
|
+
if validation_errors:
|
|
86
|
+
error_msg = "Invalid configuration:\n" + "\n".join(f" - {err}" for err in validation_errors)
|
|
87
|
+
raise ValueError(error_msg)
|
|
88
|
+
|
|
89
|
+
# Create config object with corrected data
|
|
90
|
+
config = MarkdownConfluenceConfig.from_dict(corrected_config)
|
|
91
|
+
|
|
92
|
+
# Validate required fields
|
|
93
|
+
field_errors = config.confluence.validate(require_parent_id=require_parent_id)
|
|
94
|
+
if field_errors:
|
|
95
|
+
raise ValueError(f"Invalid configuration: {', '.join(field_errors)}")
|
|
96
|
+
|
|
97
|
+
return config
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_api_token_from_env() -> Optional[str]:
|
|
101
|
+
"""
|
|
102
|
+
Get Atlassian API token from environment variable.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
API token or None if not set
|
|
106
|
+
"""
|
|
107
|
+
return os.environ.get("ATLASSIAN_API_TOKEN")
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration data models for markdown-confluence.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ConfluenceConfig:
|
|
12
|
+
"""
|
|
13
|
+
Configuration for Confluence connection.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
base_url: Base URL for Confluence instance
|
|
17
|
+
parent_id: ID of the parent page in Confluence (optional, only required for publishing)
|
|
18
|
+
username: Atlassian username
|
|
19
|
+
api_token: Atlassian API token (optional if set in environment)
|
|
20
|
+
space_key: Confluence space key (optional)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
base_url: str
|
|
24
|
+
username: str
|
|
25
|
+
parent_id: Optional[str] = None
|
|
26
|
+
api_token: Optional[str] = None
|
|
27
|
+
space_key: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
def __post_init__(self) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Post-initialization processing.
|
|
32
|
+
|
|
33
|
+
Loads API token from environment if not provided.
|
|
34
|
+
"""
|
|
35
|
+
if not self.api_token:
|
|
36
|
+
self.api_token = os.environ.get("ATLASSIAN_API_TOKEN")
|
|
37
|
+
|
|
38
|
+
def validate(self, require_parent_id: bool = False) -> List[str]:
|
|
39
|
+
"""
|
|
40
|
+
Validate the configuration.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
require_parent_id: Whether to require parent_id (needed for publishing, not for crawling)
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
List of validation errors, empty if valid
|
|
47
|
+
"""
|
|
48
|
+
errors = []
|
|
49
|
+
if not self.base_url:
|
|
50
|
+
errors.append("base_url is required")
|
|
51
|
+
if require_parent_id and not self.parent_id:
|
|
52
|
+
errors.append("parent_id is required for publishing operations")
|
|
53
|
+
if not self.username:
|
|
54
|
+
errors.append("username is required")
|
|
55
|
+
if not self.api_token:
|
|
56
|
+
errors.append(
|
|
57
|
+
"api_token is required (either in config or ATLASSIAN_API_TOKEN environment variable)"
|
|
58
|
+
)
|
|
59
|
+
return errors
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class PublishConfig:
|
|
64
|
+
"""
|
|
65
|
+
Configuration for publishing behavior.
|
|
66
|
+
|
|
67
|
+
Attributes:
|
|
68
|
+
folder_to_publish: Folder to publish (relative to working directory)
|
|
69
|
+
use_file_path_as_title: Whether to use file path as page title
|
|
70
|
+
prepend_file_path_to_title: Whether to prepend file path to page title
|
|
71
|
+
frontmatter_from_document_start: Whether to extract frontmatter from document start
|
|
72
|
+
skip_metadata: Whether to skip adding metadata to published pages
|
|
73
|
+
resolve_relative_links: Whether to resolve relative links between documents
|
|
74
|
+
respect_link_dependencies: Whether to respect dependencies between documents when publishing
|
|
75
|
+
auto_fix_hierarchy: Whether to automatically fix page hierarchy based on directory structure
|
|
76
|
+
auto_migrate_legacy: Whether to automatically migrate legacy editor pages to new editor before publishing
|
|
77
|
+
duplicate_similarity_threshold: Threshold for considering pages as duplicates (0.0-1.0, default 0.8)
|
|
78
|
+
render_mermaid_diagrams: Whether to render mermaid diagrams as images
|
|
79
|
+
process_assets: Whether to process assets (images, diagrams) for embedding
|
|
80
|
+
ignore_patterns: List of file patterns to ignore (supports glob patterns like **/TODO.md)
|
|
81
|
+
archive_ignored: Whether to archive/delete ignored files from Confluence if they have page IDs
|
|
82
|
+
enable_sync: Whether to check for remote changes before publishing (default: True)
|
|
83
|
+
auto_resolve_conflicts: Whether to automatically resolve conflicts (default: False)
|
|
84
|
+
prefer_remote_on_conflict: When auto-resolving conflicts, prefer remote changes (default: False, prefers local)
|
|
85
|
+
default_visibility: Default visibility for pages when not specified in frontmatter ("private" or "public", optional)
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
folder_to_publish: str = "."
|
|
89
|
+
use_file_path_as_title: bool = False
|
|
90
|
+
prepend_file_path_to_title: bool = False
|
|
91
|
+
frontmatter_from_document_start: bool = True
|
|
92
|
+
skip_metadata: bool = False
|
|
93
|
+
resolve_relative_links: bool = True
|
|
94
|
+
respect_link_dependencies: bool = True
|
|
95
|
+
auto_fix_hierarchy: bool = True
|
|
96
|
+
auto_handle_archived: bool = True
|
|
97
|
+
auto_migrate_legacy: bool = True
|
|
98
|
+
duplicate_similarity_threshold: float = 0.8
|
|
99
|
+
render_mermaid_diagrams: bool = True
|
|
100
|
+
process_assets: bool = True
|
|
101
|
+
ignore_patterns: List[str] = field(default_factory=list)
|
|
102
|
+
archive_ignored: bool = True
|
|
103
|
+
enable_sync: bool = True
|
|
104
|
+
auto_resolve_conflicts: bool = False
|
|
105
|
+
prefer_remote_on_conflict: bool = False
|
|
106
|
+
default_visibility: Optional[str] = None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass
|
|
110
|
+
class MarkdownConfluenceConfig:
|
|
111
|
+
"""
|
|
112
|
+
Complete configuration for markdown-confluence.
|
|
113
|
+
|
|
114
|
+
Attributes:
|
|
115
|
+
confluence: Confluence connection configuration
|
|
116
|
+
publish: Publishing behavior configuration
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
confluence: ConfluenceConfig
|
|
120
|
+
publish: PublishConfig = field(default_factory=PublishConfig)
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def from_dict(cls, data: Dict[str, Any]) -> "MarkdownConfluenceConfig":
|
|
124
|
+
"""
|
|
125
|
+
Create configuration from dictionary.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
data: Dictionary with configuration values (supports both nested and flat structures)
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Configuration object
|
|
132
|
+
"""
|
|
133
|
+
# Handle both nested structure ({"confluence": {...}, "publish": {...}})
|
|
134
|
+
# and flat structure ({"base_url": ..., "ignore_patterns": ...})
|
|
135
|
+
confluence_dict = data.get("confluence", data)
|
|
136
|
+
publish_dict = data.get("publish", data)
|
|
137
|
+
|
|
138
|
+
confluence_data = {
|
|
139
|
+
"base_url": confluence_dict.get("base_url") or confluence_dict.get("confluenceBaseUrl") or os.environ.get("CONFLUENCE_BASE_URL") or os.environ.get("CONFLUENCE_URL") or "",
|
|
140
|
+
"parent_id": confluence_dict.get("parent_id") or confluence_dict.get("confluenceParentId") or os.environ.get("CONFLUENCE_PARENT_ID") or "",
|
|
141
|
+
"username": confluence_dict.get("username") or confluence_dict.get("atlassianUserName") or os.environ.get("ATLASSIAN_USER_NAME") or os.environ.get("CONFLUENCE_USERNAME") or "",
|
|
142
|
+
"api_token": confluence_dict.get("api_token") or confluence_dict.get("atlassianApiToken"),
|
|
143
|
+
"space_key": confluence_dict.get("space_key") or confluence_dict.get("confluenceSpaceKey") or os.environ.get("CONFLUENCE_SPACE_KEY") or "",
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
publish_data = {
|
|
147
|
+
"folder_to_publish": publish_dict.get("folder_to_publish", publish_dict.get("folderToPublish", ".")),
|
|
148
|
+
"use_file_path_as_title": publish_dict.get("use_file_path_as_title", publish_dict.get("useFilePathAsTitle", False)),
|
|
149
|
+
"prepend_file_path_to_title": publish_dict.get("prepend_file_path_to_title", publish_dict.get("prependFilePathToTitle", False)),
|
|
150
|
+
"frontmatter_from_document_start": publish_dict.get("frontmatter_from_document_start", publish_dict.get("frontmatterFromDocumentStart", True)),
|
|
151
|
+
"skip_metadata": publish_dict.get("skip_metadata", publish_dict.get("skipMetadata", False)),
|
|
152
|
+
"resolve_relative_links": publish_dict.get("resolve_relative_links", publish_dict.get("resolveRelativeLinks", True)),
|
|
153
|
+
"respect_link_dependencies": publish_dict.get("respect_link_dependencies", publish_dict.get("respectLinkDependencies", True)),
|
|
154
|
+
"auto_fix_hierarchy": publish_dict.get("auto_fix_hierarchy", publish_dict.get("autoFixHierarchy", True)),
|
|
155
|
+
"auto_handle_archived": publish_dict.get("auto_handle_archived", publish_dict.get("autoHandleArchived", True)),
|
|
156
|
+
"auto_migrate_legacy": publish_dict.get("auto_migrate_legacy", publish_dict.get("autoMigrateLegacy", True)),
|
|
157
|
+
"duplicate_similarity_threshold": publish_dict.get("duplicate_similarity_threshold", publish_dict.get("duplicateSimilarityThreshold", 0.8)),
|
|
158
|
+
"render_mermaid_diagrams": publish_dict.get("render_mermaid_diagrams", publish_dict.get("renderMermaidDiagrams", True)),
|
|
159
|
+
"process_assets": publish_dict.get("process_assets", publish_dict.get("processAssets", True)),
|
|
160
|
+
"ignore_patterns": publish_dict.get("ignore_patterns", publish_dict.get("ignorePatterns", [])),
|
|
161
|
+
"archive_ignored": publish_dict.get("archive_ignored", publish_dict.get("archiveIgnored", True)),
|
|
162
|
+
"default_visibility": publish_dict.get("default_visibility", publish_dict.get("defaultVisibility")),
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return cls(
|
|
166
|
+
confluence=ConfluenceConfig(**confluence_data), publish=PublishConfig(**publish_data)
|
|
167
|
+
)
|