docspan 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. docspan/__init__.py +3 -0
  2. docspan/__main__.py +0 -0
  3. docspan/backends/__init__.py +19 -0
  4. docspan/backends/base.py +85 -0
  5. docspan/backends/confluence/__init__.py +0 -0
  6. docspan/backends/confluence/adf/__init__.py +14 -0
  7. docspan/backends/confluence/adf/comparator.py +427 -0
  8. docspan/backends/confluence/adf/converter.py +119 -0
  9. docspan/backends/confluence/adf/converters.py +1449 -0
  10. docspan/backends/confluence/adf/interfaces.py +191 -0
  11. docspan/backends/confluence/adf/nodes.py +2085 -0
  12. docspan/backends/confluence/adf/parser.py +400 -0
  13. docspan/backends/confluence/adf/validators.py +161 -0
  14. docspan/backends/confluence/adf/visitors.py +495 -0
  15. docspan/backends/confluence/backend.py +227 -0
  16. docspan/backends/confluence/client.py +44 -0
  17. docspan/backends/confluence/config/__init__.py +21 -0
  18. docspan/backends/confluence/config/loader.py +107 -0
  19. docspan/backends/confluence/config/models.py +167 -0
  20. docspan/backends/confluence/config/validation.py +297 -0
  21. docspan/backends/confluence/markdown/__init__.py +22 -0
  22. docspan/backends/confluence/markdown/ast.py +819 -0
  23. docspan/backends/confluence/markdown/extensions/__init__.py +5 -0
  24. docspan/backends/confluence/markdown/extensions/frontmatter.py +80 -0
  25. docspan/backends/confluence/markdown/extensions/mermaid.py +64 -0
  26. docspan/backends/confluence/markdown/extensions/wikilinks.py +179 -0
  27. docspan/backends/confluence/markdown/inline_parser.py +495 -0
  28. docspan/backends/confluence/markdown/parser.py +1006 -0
  29. docspan/backends/confluence/models/__init__.py +18 -0
  30. docspan/backends/confluence/models/markdown_file.py +402 -0
  31. docspan/backends/confluence/models/page.py +212 -0
  32. docspan/backends/confluence/models/path_utils.py +34 -0
  33. docspan/backends/confluence/models/results.py +28 -0
  34. docspan/backends/confluence/models/sync_status.py +382 -0
  35. docspan/backends/confluence/services/__init__.py +0 -0
  36. docspan/backends/confluence/services/confluence/__init__.py +40 -0
  37. docspan/backends/confluence/services/confluence/attachment_client.py +147 -0
  38. docspan/backends/confluence/services/confluence/base_client.py +420 -0
  39. docspan/backends/confluence/services/confluence/client.py +376 -0
  40. docspan/backends/confluence/services/confluence/comment_client.py +682 -0
  41. docspan/backends/confluence/services/confluence/crawler.py +587 -0
  42. docspan/backends/confluence/services/confluence/label_client.py +130 -0
  43. docspan/backends/confluence/services/confluence/page_client.py +1288 -0
  44. docspan/backends/confluence/services/confluence/space_client.py +179 -0
  45. docspan/backends/confluence/services/confluence/url_parser.py +106 -0
  46. docspan/backends/google_docs/__init__.py +0 -0
  47. docspan/backends/google_docs/auth.py +143 -0
  48. docspan/backends/google_docs/backend.py +140 -0
  49. docspan/backends/google_docs/client.py +665 -0
  50. docspan/backends/google_docs/converter.py +471 -0
  51. docspan/backends/google_docs/docs_request_builder.py +232 -0
  52. docspan/backends/google_docs/docs_structure_parser.py +120 -0
  53. docspan/backends/google_docs/markdown_to_paragraph_parser.py +145 -0
  54. docspan/cli/__init__.py +0 -0
  55. docspan/cli/main.py +408 -0
  56. docspan/config.py +62 -0
  57. docspan/core/__init__.py +49 -0
  58. docspan/core/merge.py +30 -0
  59. docspan/core/orchestrator.py +332 -0
  60. docspan/core/paths.py +8 -0
  61. docspan/core/state.py +53 -0
  62. docspan-0.1.0.dist-info/METADATA +273 -0
  63. docspan-0.1.0.dist-info/RECORD +65 -0
  64. docspan-0.1.0.dist-info/WHEEL +4 -0
  65. docspan-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,297 @@
1
+ """
2
+ Configuration validation with typo detection.
3
+
4
+ Validates configuration dictionaries and suggests corrections for typos using Levenshtein distance.
5
+ """
6
+
7
+ import logging
8
+ from typing import Any, Dict, List, Optional, Set, Tuple
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def levenshtein_distance(s1: str, s2: str) -> int:
14
+ """
15
+ Calculate Levenshtein distance between two strings.
16
+
17
+ Args:
18
+ s1: First string
19
+ s2: Second string
20
+
21
+ Returns:
22
+ Edit distance between strings
23
+ """
24
+ if len(s1) < len(s2):
25
+ return levenshtein_distance(s2, s1)
26
+
27
+ if len(s2) == 0:
28
+ return len(s1)
29
+
30
+ previous_row = range(len(s2) + 1)
31
+ for i, c1 in enumerate(s1):
32
+ current_row = [i + 1]
33
+ for j, c2 in enumerate(s2):
34
+ # Cost of insertions, deletions, or substitutions
35
+ insertions = previous_row[j + 1] + 1
36
+ deletions = current_row[j] + 1
37
+ substitutions = previous_row[j] + (c1 != c2)
38
+ current_row.append(min(insertions, deletions, substitutions))
39
+ previous_row = current_row
40
+
41
+ return previous_row[-1]
42
+
43
+
44
+ def find_closest_match(key: str, valid_keys: Set[str], max_distance: int = 3) -> Optional[str]:
45
+ """
46
+ Find the closest matching key using Levenshtein distance.
47
+
48
+ Args:
49
+ key: The invalid key to match
50
+ valid_keys: Set of valid keys
51
+ max_distance: Maximum edit distance to consider (default: 3)
52
+
53
+ Returns:
54
+ Closest matching key, or None if no close match found
55
+ """
56
+ best_match = None
57
+ best_distance = max_distance + 1
58
+
59
+ for valid_key in valid_keys:
60
+ distance = levenshtein_distance(key.lower(), valid_key.lower())
61
+ if distance < best_distance:
62
+ best_distance = distance
63
+ best_match = valid_key
64
+
65
+ return best_match if best_distance <= max_distance else None
66
+
67
+
68
+ def normalize_key(key: str) -> str:
69
+ """
70
+ Normalize a config key to snake_case.
71
+
72
+ Handles common variations:
73
+ - camelCase -> snake_case
74
+ - PascalCase -> snake_case
75
+ - kebab-case -> snake_case
76
+
77
+ Args:
78
+ key: Key to normalize
79
+
80
+ Returns:
81
+ Normalized key in snake_case
82
+ """
83
+ import re
84
+
85
+ # Replace hyphens with underscores
86
+ key = key.replace('-', '_')
87
+
88
+ # Insert underscores before uppercase letters (camelCase -> snake_case)
89
+ key = re.sub('([a-z0-9])([A-Z])', r'\1_\2', key)
90
+
91
+ # Handle consecutive capitals (HTTPSConnection -> https_connection)
92
+ key = re.sub('([A-Z]+)([A-Z][a-z])', r'\1_\2', key)
93
+
94
+ return key.lower()
95
+
96
+
97
+ # Valid configuration keys
98
+ VALID_CONFLUENCE_KEYS = {
99
+ 'base_url',
100
+ 'parent_id',
101
+ 'username',
102
+ 'api_token',
103
+ 'space_key',
104
+ }
105
+
106
+ VALID_PUBLISH_KEYS = {
107
+ 'folder_to_publish',
108
+ 'use_file_path_as_title',
109
+ 'prepend_file_path_to_title',
110
+ 'frontmatter_from_document_start',
111
+ 'skip_metadata',
112
+ 'resolve_relative_links',
113
+ 'respect_link_dependencies',
114
+ 'auto_fix_hierarchy',
115
+ 'auto_handle_archived',
116
+ 'auto_migrate_legacy',
117
+ 'duplicate_similarity_threshold',
118
+ 'render_mermaid_diagrams',
119
+ 'process_assets',
120
+ 'ignore_patterns',
121
+ 'archive_ignored',
122
+ 'enable_sync',
123
+ 'auto_resolve_conflicts',
124
+ 'prefer_remote_on_conflict',
125
+ 'default_visibility',
126
+ }
127
+
128
+ # Common typo mappings (camelCase/kebab-case to snake_case)
129
+ COMMON_VARIATIONS = {
130
+ 'baseUrl': 'base_url',
131
+ 'base-url': 'base_url',
132
+ 'parentId': 'parent_id',
133
+ 'parent-id': 'parent_id',
134
+ 'userName': 'username',
135
+ 'user-name': 'username',
136
+ 'apiToken': 'api_token',
137
+ 'api-token': 'api_token',
138
+ 'spaceKey': 'space_key',
139
+ 'space-key': 'space_key',
140
+ 'folderToPublish': 'folder_to_publish',
141
+ 'folder-to-publish': 'folder_to_publish',
142
+ 'useFilePathAsTitle': 'use_file_path_as_title',
143
+ 'use-file-path-as-title': 'use_file_path_as_title',
144
+ 'prependFilePathToTitle': 'prepend_file_path_to_title',
145
+ 'prepend-file-path-to-title': 'prepend_file_path_to_title',
146
+ 'frontmatterFromDocumentStart': 'frontmatter_from_document_start',
147
+ 'frontmatter-from-document-start': 'frontmatter_from_document_start',
148
+ 'skipMetadata': 'skip_metadata',
149
+ 'skip-metadata': 'skip_metadata',
150
+ 'resolveRelativeLinks': 'resolve_relative_links',
151
+ 'resolve-relative-links': 'resolve_relative_links',
152
+ 'respectLinkDependencies': 'respect_link_dependencies',
153
+ 'respect-link-dependencies': 'respect_link_dependencies',
154
+ 'autoFixHierarchy': 'auto_fix_hierarchy',
155
+ 'auto-fix-hierarchy': 'auto_fix_hierarchy',
156
+ 'autoHandleArchived': 'auto_handle_archived',
157
+ 'auto-handle-archived': 'auto_handle_archived',
158
+ 'autoMigrateLegacy': 'auto_migrate_legacy',
159
+ 'auto-migrate-legacy': 'auto_migrate_legacy',
160
+ 'duplicateSimilarityThreshold': 'duplicate_similarity_threshold',
161
+ 'duplicate-similarity-threshold': 'duplicate_similarity_threshold',
162
+ 'renderMermaidDiagrams': 'render_mermaid_diagrams',
163
+ 'render-mermaid-diagrams': 'render_mermaid_diagrams',
164
+ 'processAssets': 'process_assets',
165
+ 'process-assets': 'process_assets',
166
+ 'ignorePatterns': 'ignore_patterns',
167
+ 'ignore-patterns': 'ignore_patterns',
168
+ 'archiveIgnored': 'archive_ignored',
169
+ 'archive-ignored': 'archive_ignored',
170
+ 'enableSync': 'enable_sync',
171
+ 'enable-sync': 'enable_sync',
172
+ 'autoResolveConflicts': 'auto_resolve_conflicts',
173
+ 'auto-resolve-conflicts': 'auto_resolve_conflicts',
174
+ 'preferRemoteOnConflict': 'prefer_remote_on_conflict',
175
+ 'prefer-remote-on-conflict': 'prefer_remote_on_conflict',
176
+ }
177
+
178
+
179
+ def validate_config_section(
180
+ config: Dict[str, Any],
181
+ valid_keys: Set[str],
182
+ section_name: str,
183
+ auto_correct: bool = False
184
+ ) -> Tuple[Dict[str, Any], List[str], List[str]]:
185
+ """
186
+ Validate a configuration section.
187
+
188
+ Args:
189
+ config: Configuration dictionary to validate
190
+ valid_keys: Set of valid keys for this section
191
+ section_name: Name of the section (for error messages)
192
+ auto_correct: Whether to auto-correct known typos (default: False)
193
+
194
+ Returns:
195
+ Tuple of (corrected_config, errors, warnings)
196
+ """
197
+ errors = []
198
+ warnings = []
199
+ corrected = config.copy()
200
+
201
+ for key in list(corrected.keys()):
202
+ if key in valid_keys:
203
+ continue
204
+
205
+ # Check for exact match in common variations
206
+ if key in COMMON_VARIATIONS:
207
+ correct_key = COMMON_VARIATIONS[key]
208
+ if auto_correct:
209
+ warnings.append(
210
+ f"'{section_name}.{key}': Auto-corrected to '{correct_key}' "
211
+ f"(use snake_case instead of camelCase/kebab-case)"
212
+ )
213
+ corrected[correct_key] = corrected.pop(key)
214
+ else:
215
+ errors.append(
216
+ f"'{section_name}.{key}': Invalid key. Did you mean '{correct_key}'? "
217
+ f"(use snake_case instead of camelCase/kebab-case)"
218
+ )
219
+ continue
220
+
221
+ # Try to find close match using Levenshtein distance
222
+ closest = find_closest_match(key, valid_keys)
223
+ if closest:
224
+ errors.append(
225
+ f"'{section_name}.{key}': Invalid key. Did you mean '{closest}'?"
226
+ )
227
+ else:
228
+ errors.append(
229
+ f"'{section_name}.{key}': Invalid key. Valid keys are: {', '.join(sorted(valid_keys))}"
230
+ )
231
+
232
+ return corrected, errors, warnings
233
+
234
+
235
+ def validate_config_dict(
236
+ config_data: Dict[str, Any],
237
+ auto_correct: bool = False
238
+ ) -> Tuple[Dict[str, Any], List[str], List[str]]:
239
+ """
240
+ Validate entire configuration dictionary.
241
+
242
+ Args:
243
+ config_data: Configuration dictionary to validate
244
+ auto_correct: Whether to auto-correct known typos (default: False)
245
+
246
+ Returns:
247
+ Tuple of (corrected_config, errors, warnings)
248
+ """
249
+ all_errors = []
250
+ all_warnings = []
251
+ corrected = config_data.copy()
252
+
253
+ # Validate top-level structure
254
+ valid_top_keys = {'confluence', 'publish'}
255
+ for key in list(corrected.keys()):
256
+ if key not in valid_top_keys:
257
+ closest = find_closest_match(key, valid_top_keys)
258
+ if closest:
259
+ all_errors.append(
260
+ f"'{key}': Invalid top-level key. Did you mean '{closest}'?"
261
+ )
262
+ else:
263
+ all_errors.append(
264
+ f"'{key}': Invalid top-level key. Valid keys are: {', '.join(sorted(valid_top_keys))}"
265
+ )
266
+
267
+ # Validate confluence section
268
+ if 'confluence' in corrected:
269
+ if not isinstance(corrected['confluence'], dict):
270
+ all_errors.append("'confluence': Must be a dictionary")
271
+ else:
272
+ corrected_confluence, conf_errors, conf_warnings = validate_config_section(
273
+ corrected['confluence'],
274
+ VALID_CONFLUENCE_KEYS,
275
+ 'confluence',
276
+ auto_correct
277
+ )
278
+ corrected['confluence'] = corrected_confluence
279
+ all_errors.extend(conf_errors)
280
+ all_warnings.extend(conf_warnings)
281
+
282
+ # Validate publish section
283
+ if 'publish' in corrected:
284
+ if not isinstance(corrected['publish'], dict):
285
+ all_errors.append("'publish': Must be a dictionary")
286
+ else:
287
+ corrected_publish, pub_errors, pub_warnings = validate_config_section(
288
+ corrected['publish'],
289
+ VALID_PUBLISH_KEYS,
290
+ 'publish',
291
+ auto_correct
292
+ )
293
+ corrected['publish'] = corrected_publish
294
+ all_errors.extend(pub_errors)
295
+ all_warnings.extend(pub_warnings)
296
+
297
+ return corrected, all_errors, all_warnings
@@ -0,0 +1,22 @@
1
+ """
2
+ Markdown parsing module.
3
+
4
+ This module provides functionality for parsing Markdown content into an
5
+ intermediate representation suitable for conversion to ADF.
6
+ """
7
+
8
+ from docspan.backends.confluence.markdown.ast import (
9
+ HeadingNode,
10
+ MarkdownNode,
11
+ ParagraphNode,
12
+ TextNode,
13
+ )
14
+ from docspan.backends.confluence.markdown.parser import MarkdownParser
15
+
16
+ __all__ = [
17
+ "MarkdownParser",
18
+ "MarkdownNode",
19
+ "TextNode",
20
+ "HeadingNode",
21
+ "ParagraphNode",
22
+ ]