docspan 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docspan/__init__.py +3 -0
- docspan/__main__.py +0 -0
- docspan/backends/__init__.py +19 -0
- docspan/backends/base.py +85 -0
- docspan/backends/confluence/__init__.py +0 -0
- docspan/backends/confluence/adf/__init__.py +14 -0
- docspan/backends/confluence/adf/comparator.py +427 -0
- docspan/backends/confluence/adf/converter.py +119 -0
- docspan/backends/confluence/adf/converters.py +1449 -0
- docspan/backends/confluence/adf/interfaces.py +191 -0
- docspan/backends/confluence/adf/nodes.py +2085 -0
- docspan/backends/confluence/adf/parser.py +400 -0
- docspan/backends/confluence/adf/validators.py +161 -0
- docspan/backends/confluence/adf/visitors.py +495 -0
- docspan/backends/confluence/backend.py +227 -0
- docspan/backends/confluence/client.py +44 -0
- docspan/backends/confluence/config/__init__.py +21 -0
- docspan/backends/confluence/config/loader.py +107 -0
- docspan/backends/confluence/config/models.py +167 -0
- docspan/backends/confluence/config/validation.py +297 -0
- docspan/backends/confluence/markdown/__init__.py +22 -0
- docspan/backends/confluence/markdown/ast.py +819 -0
- docspan/backends/confluence/markdown/extensions/__init__.py +5 -0
- docspan/backends/confluence/markdown/extensions/frontmatter.py +80 -0
- docspan/backends/confluence/markdown/extensions/mermaid.py +64 -0
- docspan/backends/confluence/markdown/extensions/wikilinks.py +179 -0
- docspan/backends/confluence/markdown/inline_parser.py +495 -0
- docspan/backends/confluence/markdown/parser.py +1006 -0
- docspan/backends/confluence/models/__init__.py +18 -0
- docspan/backends/confluence/models/markdown_file.py +402 -0
- docspan/backends/confluence/models/page.py +212 -0
- docspan/backends/confluence/models/path_utils.py +34 -0
- docspan/backends/confluence/models/results.py +28 -0
- docspan/backends/confluence/models/sync_status.py +382 -0
- docspan/backends/confluence/services/__init__.py +0 -0
- docspan/backends/confluence/services/confluence/__init__.py +40 -0
- docspan/backends/confluence/services/confluence/attachment_client.py +147 -0
- docspan/backends/confluence/services/confluence/base_client.py +420 -0
- docspan/backends/confluence/services/confluence/client.py +376 -0
- docspan/backends/confluence/services/confluence/comment_client.py +682 -0
- docspan/backends/confluence/services/confluence/crawler.py +587 -0
- docspan/backends/confluence/services/confluence/label_client.py +130 -0
- docspan/backends/confluence/services/confluence/page_client.py +1288 -0
- docspan/backends/confluence/services/confluence/space_client.py +179 -0
- docspan/backends/confluence/services/confluence/url_parser.py +106 -0
- docspan/backends/google_docs/__init__.py +0 -0
- docspan/backends/google_docs/auth.py +143 -0
- docspan/backends/google_docs/backend.py +140 -0
- docspan/backends/google_docs/client.py +665 -0
- docspan/backends/google_docs/converter.py +471 -0
- docspan/backends/google_docs/docs_request_builder.py +232 -0
- docspan/backends/google_docs/docs_structure_parser.py +120 -0
- docspan/backends/google_docs/markdown_to_paragraph_parser.py +145 -0
- docspan/cli/__init__.py +0 -0
- docspan/cli/main.py +408 -0
- docspan/config.py +62 -0
- docspan/core/__init__.py +49 -0
- docspan/core/merge.py +30 -0
- docspan/core/orchestrator.py +332 -0
- docspan/core/paths.py +8 -0
- docspan/core/state.py +53 -0
- docspan-0.1.0.dist-info/METADATA +273 -0
- docspan-0.1.0.dist-info/RECORD +65 -0
- docspan-0.1.0.dist-info/WHEEL +4 -0
- docspan-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration validation with typo detection.
|
|
3
|
+
|
|
4
|
+
Validates configuration dictionaries and suggests corrections for typos using Levenshtein distance.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def levenshtein_distance(s1: str, s2: str) -> int:
|
|
14
|
+
"""
|
|
15
|
+
Calculate Levenshtein distance between two strings.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
s1: First string
|
|
19
|
+
s2: Second string
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Edit distance between strings
|
|
23
|
+
"""
|
|
24
|
+
if len(s1) < len(s2):
|
|
25
|
+
return levenshtein_distance(s2, s1)
|
|
26
|
+
|
|
27
|
+
if len(s2) == 0:
|
|
28
|
+
return len(s1)
|
|
29
|
+
|
|
30
|
+
previous_row = range(len(s2) + 1)
|
|
31
|
+
for i, c1 in enumerate(s1):
|
|
32
|
+
current_row = [i + 1]
|
|
33
|
+
for j, c2 in enumerate(s2):
|
|
34
|
+
# Cost of insertions, deletions, or substitutions
|
|
35
|
+
insertions = previous_row[j + 1] + 1
|
|
36
|
+
deletions = current_row[j] + 1
|
|
37
|
+
substitutions = previous_row[j] + (c1 != c2)
|
|
38
|
+
current_row.append(min(insertions, deletions, substitutions))
|
|
39
|
+
previous_row = current_row
|
|
40
|
+
|
|
41
|
+
return previous_row[-1]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def find_closest_match(key: str, valid_keys: Set[str], max_distance: int = 3) -> Optional[str]:
|
|
45
|
+
"""
|
|
46
|
+
Find the closest matching key using Levenshtein distance.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
key: The invalid key to match
|
|
50
|
+
valid_keys: Set of valid keys
|
|
51
|
+
max_distance: Maximum edit distance to consider (default: 3)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Closest matching key, or None if no close match found
|
|
55
|
+
"""
|
|
56
|
+
best_match = None
|
|
57
|
+
best_distance = max_distance + 1
|
|
58
|
+
|
|
59
|
+
for valid_key in valid_keys:
|
|
60
|
+
distance = levenshtein_distance(key.lower(), valid_key.lower())
|
|
61
|
+
if distance < best_distance:
|
|
62
|
+
best_distance = distance
|
|
63
|
+
best_match = valid_key
|
|
64
|
+
|
|
65
|
+
return best_match if best_distance <= max_distance else None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def normalize_key(key: str) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Normalize a config key to snake_case.
|
|
71
|
+
|
|
72
|
+
Handles common variations:
|
|
73
|
+
- camelCase -> snake_case
|
|
74
|
+
- PascalCase -> snake_case
|
|
75
|
+
- kebab-case -> snake_case
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
key: Key to normalize
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Normalized key in snake_case
|
|
82
|
+
"""
|
|
83
|
+
import re
|
|
84
|
+
|
|
85
|
+
# Replace hyphens with underscores
|
|
86
|
+
key = key.replace('-', '_')
|
|
87
|
+
|
|
88
|
+
# Insert underscores before uppercase letters (camelCase -> snake_case)
|
|
89
|
+
key = re.sub('([a-z0-9])([A-Z])', r'\1_\2', key)
|
|
90
|
+
|
|
91
|
+
# Handle consecutive capitals (HTTPSConnection -> https_connection)
|
|
92
|
+
key = re.sub('([A-Z]+)([A-Z][a-z])', r'\1_\2', key)
|
|
93
|
+
|
|
94
|
+
return key.lower()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# Valid configuration keys
|
|
98
|
+
VALID_CONFLUENCE_KEYS = {
|
|
99
|
+
'base_url',
|
|
100
|
+
'parent_id',
|
|
101
|
+
'username',
|
|
102
|
+
'api_token',
|
|
103
|
+
'space_key',
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
VALID_PUBLISH_KEYS = {
|
|
107
|
+
'folder_to_publish',
|
|
108
|
+
'use_file_path_as_title',
|
|
109
|
+
'prepend_file_path_to_title',
|
|
110
|
+
'frontmatter_from_document_start',
|
|
111
|
+
'skip_metadata',
|
|
112
|
+
'resolve_relative_links',
|
|
113
|
+
'respect_link_dependencies',
|
|
114
|
+
'auto_fix_hierarchy',
|
|
115
|
+
'auto_handle_archived',
|
|
116
|
+
'auto_migrate_legacy',
|
|
117
|
+
'duplicate_similarity_threshold',
|
|
118
|
+
'render_mermaid_diagrams',
|
|
119
|
+
'process_assets',
|
|
120
|
+
'ignore_patterns',
|
|
121
|
+
'archive_ignored',
|
|
122
|
+
'enable_sync',
|
|
123
|
+
'auto_resolve_conflicts',
|
|
124
|
+
'prefer_remote_on_conflict',
|
|
125
|
+
'default_visibility',
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Common typo mappings (camelCase/kebab-case to snake_case)
|
|
129
|
+
COMMON_VARIATIONS = {
|
|
130
|
+
'baseUrl': 'base_url',
|
|
131
|
+
'base-url': 'base_url',
|
|
132
|
+
'parentId': 'parent_id',
|
|
133
|
+
'parent-id': 'parent_id',
|
|
134
|
+
'userName': 'username',
|
|
135
|
+
'user-name': 'username',
|
|
136
|
+
'apiToken': 'api_token',
|
|
137
|
+
'api-token': 'api_token',
|
|
138
|
+
'spaceKey': 'space_key',
|
|
139
|
+
'space-key': 'space_key',
|
|
140
|
+
'folderToPublish': 'folder_to_publish',
|
|
141
|
+
'folder-to-publish': 'folder_to_publish',
|
|
142
|
+
'useFilePathAsTitle': 'use_file_path_as_title',
|
|
143
|
+
'use-file-path-as-title': 'use_file_path_as_title',
|
|
144
|
+
'prependFilePathToTitle': 'prepend_file_path_to_title',
|
|
145
|
+
'prepend-file-path-to-title': 'prepend_file_path_to_title',
|
|
146
|
+
'frontmatterFromDocumentStart': 'frontmatter_from_document_start',
|
|
147
|
+
'frontmatter-from-document-start': 'frontmatter_from_document_start',
|
|
148
|
+
'skipMetadata': 'skip_metadata',
|
|
149
|
+
'skip-metadata': 'skip_metadata',
|
|
150
|
+
'resolveRelativeLinks': 'resolve_relative_links',
|
|
151
|
+
'resolve-relative-links': 'resolve_relative_links',
|
|
152
|
+
'respectLinkDependencies': 'respect_link_dependencies',
|
|
153
|
+
'respect-link-dependencies': 'respect_link_dependencies',
|
|
154
|
+
'autoFixHierarchy': 'auto_fix_hierarchy',
|
|
155
|
+
'auto-fix-hierarchy': 'auto_fix_hierarchy',
|
|
156
|
+
'autoHandleArchived': 'auto_handle_archived',
|
|
157
|
+
'auto-handle-archived': 'auto_handle_archived',
|
|
158
|
+
'autoMigrateLegacy': 'auto_migrate_legacy',
|
|
159
|
+
'auto-migrate-legacy': 'auto_migrate_legacy',
|
|
160
|
+
'duplicateSimilarityThreshold': 'duplicate_similarity_threshold',
|
|
161
|
+
'duplicate-similarity-threshold': 'duplicate_similarity_threshold',
|
|
162
|
+
'renderMermaidDiagrams': 'render_mermaid_diagrams',
|
|
163
|
+
'render-mermaid-diagrams': 'render_mermaid_diagrams',
|
|
164
|
+
'processAssets': 'process_assets',
|
|
165
|
+
'process-assets': 'process_assets',
|
|
166
|
+
'ignorePatterns': 'ignore_patterns',
|
|
167
|
+
'ignore-patterns': 'ignore_patterns',
|
|
168
|
+
'archiveIgnored': 'archive_ignored',
|
|
169
|
+
'archive-ignored': 'archive_ignored',
|
|
170
|
+
'enableSync': 'enable_sync',
|
|
171
|
+
'enable-sync': 'enable_sync',
|
|
172
|
+
'autoResolveConflicts': 'auto_resolve_conflicts',
|
|
173
|
+
'auto-resolve-conflicts': 'auto_resolve_conflicts',
|
|
174
|
+
'preferRemoteOnConflict': 'prefer_remote_on_conflict',
|
|
175
|
+
'prefer-remote-on-conflict': 'prefer_remote_on_conflict',
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def validate_config_section(
|
|
180
|
+
config: Dict[str, Any],
|
|
181
|
+
valid_keys: Set[str],
|
|
182
|
+
section_name: str,
|
|
183
|
+
auto_correct: bool = False
|
|
184
|
+
) -> Tuple[Dict[str, Any], List[str], List[str]]:
|
|
185
|
+
"""
|
|
186
|
+
Validate a configuration section.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
config: Configuration dictionary to validate
|
|
190
|
+
valid_keys: Set of valid keys for this section
|
|
191
|
+
section_name: Name of the section (for error messages)
|
|
192
|
+
auto_correct: Whether to auto-correct known typos (default: False)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Tuple of (corrected_config, errors, warnings)
|
|
196
|
+
"""
|
|
197
|
+
errors = []
|
|
198
|
+
warnings = []
|
|
199
|
+
corrected = config.copy()
|
|
200
|
+
|
|
201
|
+
for key in list(corrected.keys()):
|
|
202
|
+
if key in valid_keys:
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
# Check for exact match in common variations
|
|
206
|
+
if key in COMMON_VARIATIONS:
|
|
207
|
+
correct_key = COMMON_VARIATIONS[key]
|
|
208
|
+
if auto_correct:
|
|
209
|
+
warnings.append(
|
|
210
|
+
f"'{section_name}.{key}': Auto-corrected to '{correct_key}' "
|
|
211
|
+
f"(use snake_case instead of camelCase/kebab-case)"
|
|
212
|
+
)
|
|
213
|
+
corrected[correct_key] = corrected.pop(key)
|
|
214
|
+
else:
|
|
215
|
+
errors.append(
|
|
216
|
+
f"'{section_name}.{key}': Invalid key. Did you mean '{correct_key}'? "
|
|
217
|
+
f"(use snake_case instead of camelCase/kebab-case)"
|
|
218
|
+
)
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
# Try to find close match using Levenshtein distance
|
|
222
|
+
closest = find_closest_match(key, valid_keys)
|
|
223
|
+
if closest:
|
|
224
|
+
errors.append(
|
|
225
|
+
f"'{section_name}.{key}': Invalid key. Did you mean '{closest}'?"
|
|
226
|
+
)
|
|
227
|
+
else:
|
|
228
|
+
errors.append(
|
|
229
|
+
f"'{section_name}.{key}': Invalid key. Valid keys are: {', '.join(sorted(valid_keys))}"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
return corrected, errors, warnings
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def validate_config_dict(
|
|
236
|
+
config_data: Dict[str, Any],
|
|
237
|
+
auto_correct: bool = False
|
|
238
|
+
) -> Tuple[Dict[str, Any], List[str], List[str]]:
|
|
239
|
+
"""
|
|
240
|
+
Validate entire configuration dictionary.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
config_data: Configuration dictionary to validate
|
|
244
|
+
auto_correct: Whether to auto-correct known typos (default: False)
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Tuple of (corrected_config, errors, warnings)
|
|
248
|
+
"""
|
|
249
|
+
all_errors = []
|
|
250
|
+
all_warnings = []
|
|
251
|
+
corrected = config_data.copy()
|
|
252
|
+
|
|
253
|
+
# Validate top-level structure
|
|
254
|
+
valid_top_keys = {'confluence', 'publish'}
|
|
255
|
+
for key in list(corrected.keys()):
|
|
256
|
+
if key not in valid_top_keys:
|
|
257
|
+
closest = find_closest_match(key, valid_top_keys)
|
|
258
|
+
if closest:
|
|
259
|
+
all_errors.append(
|
|
260
|
+
f"'{key}': Invalid top-level key. Did you mean '{closest}'?"
|
|
261
|
+
)
|
|
262
|
+
else:
|
|
263
|
+
all_errors.append(
|
|
264
|
+
f"'{key}': Invalid top-level key. Valid keys are: {', '.join(sorted(valid_top_keys))}"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Validate confluence section
|
|
268
|
+
if 'confluence' in corrected:
|
|
269
|
+
if not isinstance(corrected['confluence'], dict):
|
|
270
|
+
all_errors.append("'confluence': Must be a dictionary")
|
|
271
|
+
else:
|
|
272
|
+
corrected_confluence, conf_errors, conf_warnings = validate_config_section(
|
|
273
|
+
corrected['confluence'],
|
|
274
|
+
VALID_CONFLUENCE_KEYS,
|
|
275
|
+
'confluence',
|
|
276
|
+
auto_correct
|
|
277
|
+
)
|
|
278
|
+
corrected['confluence'] = corrected_confluence
|
|
279
|
+
all_errors.extend(conf_errors)
|
|
280
|
+
all_warnings.extend(conf_warnings)
|
|
281
|
+
|
|
282
|
+
# Validate publish section
|
|
283
|
+
if 'publish' in corrected:
|
|
284
|
+
if not isinstance(corrected['publish'], dict):
|
|
285
|
+
all_errors.append("'publish': Must be a dictionary")
|
|
286
|
+
else:
|
|
287
|
+
corrected_publish, pub_errors, pub_warnings = validate_config_section(
|
|
288
|
+
corrected['publish'],
|
|
289
|
+
VALID_PUBLISH_KEYS,
|
|
290
|
+
'publish',
|
|
291
|
+
auto_correct
|
|
292
|
+
)
|
|
293
|
+
corrected['publish'] = corrected_publish
|
|
294
|
+
all_errors.extend(pub_errors)
|
|
295
|
+
all_warnings.extend(pub_warnings)
|
|
296
|
+
|
|
297
|
+
return corrected, all_errors, all_warnings
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Markdown parsing module.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for parsing Markdown content into an
|
|
5
|
+
intermediate representation suitable for conversion to ADF.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from docspan.backends.confluence.markdown.ast import (
|
|
9
|
+
HeadingNode,
|
|
10
|
+
MarkdownNode,
|
|
11
|
+
ParagraphNode,
|
|
12
|
+
TextNode,
|
|
13
|
+
)
|
|
14
|
+
from docspan.backends.confluence.markdown.parser import MarkdownParser
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"MarkdownParser",
|
|
18
|
+
"MarkdownNode",
|
|
19
|
+
"TextNode",
|
|
20
|
+
"HeadingNode",
|
|
21
|
+
"ParagraphNode",
|
|
22
|
+
]
|