docspan 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. docspan/__init__.py +3 -0
  2. docspan/__main__.py +0 -0
  3. docspan/backends/__init__.py +19 -0
  4. docspan/backends/base.py +85 -0
  5. docspan/backends/confluence/__init__.py +0 -0
  6. docspan/backends/confluence/adf/__init__.py +14 -0
  7. docspan/backends/confluence/adf/comparator.py +427 -0
  8. docspan/backends/confluence/adf/converter.py +119 -0
  9. docspan/backends/confluence/adf/converters.py +1449 -0
  10. docspan/backends/confluence/adf/interfaces.py +191 -0
  11. docspan/backends/confluence/adf/nodes.py +2085 -0
  12. docspan/backends/confluence/adf/parser.py +400 -0
  13. docspan/backends/confluence/adf/validators.py +161 -0
  14. docspan/backends/confluence/adf/visitors.py +495 -0
  15. docspan/backends/confluence/backend.py +227 -0
  16. docspan/backends/confluence/client.py +44 -0
  17. docspan/backends/confluence/config/__init__.py +21 -0
  18. docspan/backends/confluence/config/loader.py +107 -0
  19. docspan/backends/confluence/config/models.py +167 -0
  20. docspan/backends/confluence/config/validation.py +297 -0
  21. docspan/backends/confluence/markdown/__init__.py +22 -0
  22. docspan/backends/confluence/markdown/ast.py +819 -0
  23. docspan/backends/confluence/markdown/extensions/__init__.py +5 -0
  24. docspan/backends/confluence/markdown/extensions/frontmatter.py +80 -0
  25. docspan/backends/confluence/markdown/extensions/mermaid.py +64 -0
  26. docspan/backends/confluence/markdown/extensions/wikilinks.py +179 -0
  27. docspan/backends/confluence/markdown/inline_parser.py +495 -0
  28. docspan/backends/confluence/markdown/parser.py +1006 -0
  29. docspan/backends/confluence/models/__init__.py +18 -0
  30. docspan/backends/confluence/models/markdown_file.py +402 -0
  31. docspan/backends/confluence/models/page.py +212 -0
  32. docspan/backends/confluence/models/path_utils.py +34 -0
  33. docspan/backends/confluence/models/results.py +28 -0
  34. docspan/backends/confluence/models/sync_status.py +382 -0
  35. docspan/backends/confluence/services/__init__.py +0 -0
  36. docspan/backends/confluence/services/confluence/__init__.py +40 -0
  37. docspan/backends/confluence/services/confluence/attachment_client.py +147 -0
  38. docspan/backends/confluence/services/confluence/base_client.py +420 -0
  39. docspan/backends/confluence/services/confluence/client.py +376 -0
  40. docspan/backends/confluence/services/confluence/comment_client.py +682 -0
  41. docspan/backends/confluence/services/confluence/crawler.py +587 -0
  42. docspan/backends/confluence/services/confluence/label_client.py +130 -0
  43. docspan/backends/confluence/services/confluence/page_client.py +1288 -0
  44. docspan/backends/confluence/services/confluence/space_client.py +179 -0
  45. docspan/backends/confluence/services/confluence/url_parser.py +106 -0
  46. docspan/backends/google_docs/__init__.py +0 -0
  47. docspan/backends/google_docs/auth.py +143 -0
  48. docspan/backends/google_docs/backend.py +140 -0
  49. docspan/backends/google_docs/client.py +665 -0
  50. docspan/backends/google_docs/converter.py +471 -0
  51. docspan/backends/google_docs/docs_request_builder.py +232 -0
  52. docspan/backends/google_docs/docs_structure_parser.py +120 -0
  53. docspan/backends/google_docs/markdown_to_paragraph_parser.py +145 -0
  54. docspan/cli/__init__.py +0 -0
  55. docspan/cli/main.py +408 -0
  56. docspan/config.py +62 -0
  57. docspan/core/__init__.py +49 -0
  58. docspan/core/merge.py +30 -0
  59. docspan/core/orchestrator.py +332 -0
  60. docspan/core/paths.py +8 -0
  61. docspan/core/state.py +53 -0
  62. docspan-0.1.0.dist-info/METADATA +273 -0
  63. docspan-0.1.0.dist-info/RECORD +65 -0
  64. docspan-0.1.0.dist-info/WHEEL +4 -0
  65. docspan-0.1.0.dist-info/entry_points.txt +2 -0
docspan/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """docspan — push and pull markdown to Google Docs and Confluence."""
2
+
3
+ __version__ = "0.1.0"
docspan/__main__.py ADDED
File without changes
@@ -0,0 +1,19 @@
1
+ """Backend registry — maps backend names to their classes."""
2
+
3
+ from docspan.backends.base import Backend, PullResult, PushResult, RemoteDoc, SyncDirection
4
+ from docspan.backends.confluence.backend import ConfluenceBackend
5
+ from docspan.backends.google_docs.backend import GoogleDocsBackend
6
+
7
+ BACKENDS: dict[str, type[Backend]] = {
8
+ "google_docs": GoogleDocsBackend,
9
+ "confluence": ConfluenceBackend,
10
+ }
11
+
12
+ __all__ = [
13
+ "Backend",
14
+ "SyncDirection",
15
+ "RemoteDoc",
16
+ "PushResult",
17
+ "PullResult",
18
+ "BACKENDS",
19
+ ]
@@ -0,0 +1,85 @@
1
+ """Abstract backend interface. Every platform adapter implements this."""
2
+
3
+ import inspect
4
+ from abc import ABC, abstractmethod
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+ from typing import Literal, Optional
8
+
9
+
10
+ class SyncDirection(str, Enum):
11
+ PUSH = "push" # local markdown → remote doc
12
+ PULL = "pull" # remote doc → local markdown
13
+ BOTH = "both"
14
+
15
+
16
+ @dataclass
17
+ class RemoteDoc:
18
+ """Represents a remote document retrieved from a platform."""
19
+ doc_id: str
20
+ title: str
21
+ content_markdown: str
22
+ last_modified: Optional[str] = None
23
+ url: Optional[str] = None
24
+
25
+
26
+ @dataclass
27
+ class PushResult:
28
+ status: Literal["ok", "conflict", "error", "skipped"]
29
+ doc_id: str
30
+ message: Optional[str] = None
31
+ url: Optional[str] = None
32
+
33
+
34
+ @dataclass
35
+ class PullResult:
36
+ status: Literal["ok", "conflict", "error", "skipped"]
37
+ doc_id: str
38
+ local_path: str
39
+ message: Optional[str] = None
40
+
41
+
42
+ class Backend(ABC):
43
+ """
44
+ Base class for all docspan platform backends.
45
+
46
+ Implementing a new backend:
47
+ 1. Subclass Backend
48
+ 2. Set class attribute `name` (e.g. name = "my_backend")
49
+ 3. Implement push(), pull(), auth_setup(), and validate_config()
50
+ 4. Register in src/docspan/backends/__init__.py
51
+ """
52
+
53
+ name: str # must be overridden in every concrete subclass
54
+
55
+ def __init_subclass__(cls, **kwargs: object) -> None:
56
+ super().__init_subclass__(**kwargs)
57
+ if not inspect.isabstract(cls) and "name" not in cls.__dict__:
58
+ raise TypeError(
59
+ f"{cls.__name__} must define a 'name' class attribute (e.g. name = 'my_backend')"
60
+ )
61
+
62
+ @abstractmethod
63
+ def push(self, local_path: str, doc_id: str, **kwargs) -> PushResult:
64
+ """Convert local markdown file and update the remote document."""
65
+
66
+ @abstractmethod
67
+ def pull(self, doc_id: str, local_path: str, **kwargs) -> PullResult:
68
+ """Fetch the remote document and write it as local markdown."""
69
+
70
+ @abstractmethod
71
+ def auth_setup(self) -> None:
72
+ """Interactive / instructional setup wizard for this backend."""
73
+
74
+ @abstractmethod
75
+ def get_remote_version(self, doc_id: str) -> str:
76
+ """
77
+ Return an opaque version token for the current remote document state.
78
+ - Google Docs: returns doc['revisionId'] (opaque string)
79
+ - Confluence: returns str(page['version']['number']) (monotonic integer as string)
80
+ Used by == comparison to detect remote changes between syncs.
81
+ """
82
+
83
+ @abstractmethod
84
+ def validate_config(self) -> None:
85
+ """Raise ValueError with a clear message if config is missing required keys."""
File without changes
@@ -0,0 +1,14 @@
1
+ """
2
+ Atlassian Document Format (ADF) conversion module.
3
+
4
+ This module provides components for converting Markdown AST to
5
+ Atlassian Document Format (ADF).
6
+ """
7
+
8
+ from docspan.backends.confluence.adf.converter import AdfConverter
9
+ from docspan.backends.confluence.adf.nodes import AdfNode
10
+
11
+ __all__ = [
12
+ "AdfConverter",
13
+ "AdfNode",
14
+ ]
@@ -0,0 +1,427 @@
1
+ """
2
+ Markdown-to-ADF comparison tool.
3
+
4
+ This module provides functionality to compare markdown input with generated ADF output
5
+ to identify discrepancies and issues in the conversion process.
6
+ """
7
+
8
+ import difflib
9
+ from dataclasses import dataclass, field
10
+ from enum import Enum
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from docspan.backends.confluence.adf.converter import AdfConverter
15
+ from docspan.backends.confluence.adf.parser import AdfDocument, AdfNodeType, AdfParser
16
+ from docspan.backends.confluence.markdown.ast import MarkdownNode
17
+ from docspan.backends.confluence.markdown.parser import MarkdownParser
18
+
19
+
20
+ class DifferenceType(Enum):
21
+ """Types of differences that can be found."""
22
+
23
+ MISSING_CONTENT = "missing_content"
24
+ EXTRA_CONTENT = "extra_content"
25
+ STRUCTURE_MISMATCH = "structure_mismatch"
26
+ TEXT_MISMATCH = "text_mismatch"
27
+ ATTRIBUTE_MISMATCH = "attribute_mismatch"
28
+ LINK_MISMATCH = "link_mismatch"
29
+ FORMAT_MISMATCH = "format_mismatch"
30
+
31
+
32
+ @dataclass
33
+ class Difference:
34
+ """Represents a difference between markdown and ADF."""
35
+
36
+ type: DifferenceType
37
+ location: str
38
+ expected: Any
39
+ actual: Any
40
+ severity: str = "medium" # low, medium, high
41
+ description: Optional[str] = None
42
+
43
+ def to_dict(self) -> Dict[str, Any]:
44
+ """Convert to dictionary."""
45
+ return {
46
+ "type": self.type.value,
47
+ "location": self.location,
48
+ "expected": str(self.expected),
49
+ "actual": str(self.actual),
50
+ "severity": self.severity,
51
+ "description": self.description,
52
+ }
53
+
54
+
55
+ @dataclass
56
+ class ComparisonReport:
57
+ """Report from comparing markdown and ADF."""
58
+
59
+ markdown_file: Optional[Path] = None
60
+ total_differences: int = 0
61
+ differences: List[Difference] = field(default_factory=list)
62
+ markdown_stats: Dict[str, Any] = field(default_factory=dict)
63
+ adf_stats: Dict[str, Any] = field(default_factory=dict)
64
+ success: bool = True
65
+
66
+ def add_difference(
67
+ self,
68
+ diff_type: DifferenceType,
69
+ location: str,
70
+ expected: Any,
71
+ actual: Any,
72
+ severity: str = "medium",
73
+ description: Optional[str] = None,
74
+ ) -> None:
75
+ """Add a difference to the report."""
76
+ diff = Difference(
77
+ type=diff_type,
78
+ location=location,
79
+ expected=expected,
80
+ actual=actual,
81
+ severity=severity,
82
+ description=description,
83
+ )
84
+ self.differences.append(diff)
85
+ self.total_differences += 1
86
+ self.success = False
87
+
88
+ def get_differences_by_type(self, diff_type: DifferenceType) -> List[Difference]:
89
+ """Get all differences of a specific type."""
90
+ return [d for d in self.differences if d.type == diff_type]
91
+
92
+ def get_differences_by_severity(self, severity: str) -> List[Difference]:
93
+ """Get all differences of a specific severity."""
94
+ return [d for d in self.differences if d.severity == severity]
95
+
96
+ def to_dict(self) -> Dict[str, Any]:
97
+ """Convert to dictionary."""
98
+ return {
99
+ "markdown_file": str(self.markdown_file) if self.markdown_file else None,
100
+ "success": self.success,
101
+ "total_differences": self.total_differences,
102
+ "differences": [d.to_dict() for d in self.differences],
103
+ "differences_by_type": {
104
+ diff_type.value: len(self.get_differences_by_type(diff_type))
105
+ for diff_type in DifferenceType
106
+ },
107
+ "differences_by_severity": {
108
+ severity: len(self.get_differences_by_severity(severity))
109
+ for severity in ["low", "medium", "high"]
110
+ },
111
+ "markdown_stats": self.markdown_stats,
112
+ "adf_stats": self.adf_stats,
113
+ }
114
+
115
+
116
+ class MarkdownAdfComparator:
117
+ """
118
+ Compare markdown input with generated ADF output.
119
+
120
+ This class provides methods to identify discrepancies between markdown
121
+ source and the ADF that was generated from it.
122
+ """
123
+
124
+ def __init__(self):
125
+ """Initialize the comparator."""
126
+ self.markdown_parser = MarkdownParser()
127
+ self.adf_converter = AdfConverter()
128
+ self.adf_parser = AdfParser()
129
+
130
+ def compare_from_markdown(
131
+ self,
132
+ markdown_content: str,
133
+ markdown_file: Optional[Path] = None,
134
+ ) -> ComparisonReport:
135
+ """
136
+ Compare markdown content with its generated ADF.
137
+
138
+ Args:
139
+ markdown_content: Markdown content to convert and compare
140
+ markdown_file: Optional path to the markdown file
141
+
142
+ Returns:
143
+ Comparison report
144
+ """
145
+ report = ComparisonReport(markdown_file=markdown_file)
146
+
147
+ try:
148
+ # Parse markdown
149
+ md_nodes = self.markdown_parser.parse(markdown_content)
150
+
151
+ # Convert to ADF
152
+ adf_dict = self.adf_converter.convert(md_nodes)
153
+
154
+ # Parse ADF back
155
+ adf_doc = self.adf_parser.parse(adf_dict)
156
+
157
+ # Collect statistics
158
+ report.markdown_stats = self._get_markdown_stats(md_nodes)
159
+ report.adf_stats = adf_doc.get_node_statistics()
160
+
161
+ # Compare structure
162
+ self._compare_structure(md_nodes, adf_doc, report)
163
+
164
+ # Compare text content
165
+ self._compare_text_content(md_nodes, adf_doc, report)
166
+
167
+ # Compare links
168
+ self._compare_links(md_nodes, adf_doc, report)
169
+
170
+ except Exception as e:
171
+ report.add_difference(
172
+ DifferenceType.STRUCTURE_MISMATCH,
173
+ "root",
174
+ "valid conversion",
175
+ f"conversion failed: {e}",
176
+ severity="high",
177
+ description=str(e),
178
+ )
179
+
180
+ return report
181
+
182
+ def compare_with_existing_adf(
183
+ self,
184
+ markdown_content: str,
185
+ existing_adf: Dict[str, Any],
186
+ markdown_file: Optional[Path] = None,
187
+ ) -> ComparisonReport:
188
+ """
189
+ Compare markdown with an existing ADF document.
190
+
191
+ Args:
192
+ markdown_content: Markdown source content
193
+ existing_adf: Existing ADF document (from Confluence)
194
+ markdown_file: Optional path to the markdown file
195
+
196
+ Returns:
197
+ Comparison report
198
+ """
199
+ report = ComparisonReport(markdown_file=markdown_file)
200
+
201
+ try:
202
+ # Parse markdown
203
+ md_nodes = self.markdown_parser.parse(markdown_content)
204
+
205
+ # Convert to ADF
206
+ generated_adf_dict = self.adf_converter.convert(md_nodes)
207
+
208
+ # Parse both ADF documents
209
+ generated_adf = self.adf_parser.parse(generated_adf_dict)
210
+ existing_adf_doc = self.adf_parser.parse(existing_adf)
211
+
212
+ # Collect statistics
213
+ report.markdown_stats = self._get_markdown_stats(md_nodes)
214
+ report.adf_stats = {
215
+ "generated": generated_adf.get_node_statistics(),
216
+ "existing": existing_adf_doc.get_node_statistics(),
217
+ }
218
+
219
+ # Compare the two ADF documents
220
+ self._compare_adf_documents(generated_adf, existing_adf_doc, report)
221
+
222
+ except Exception as e:
223
+ report.add_difference(
224
+ DifferenceType.STRUCTURE_MISMATCH,
225
+ "root",
226
+ "successful comparison",
227
+ f"comparison failed: {e}",
228
+ severity="high",
229
+ description=str(e),
230
+ )
231
+
232
+ return report
233
+
234
+ def _compare_structure(
235
+ self,
236
+ md_nodes: List[MarkdownNode],
237
+ adf_doc: AdfDocument,
238
+ report: ComparisonReport,
239
+ ) -> None:
240
+ """Compare the overall structure of markdown and ADF."""
241
+ # Count major structural elements
242
+ md_headings = len([n for n in self._flatten_md_nodes(md_nodes) if n.type == "heading"])
243
+ md_lists = len([n for n in self._flatten_md_nodes(md_nodes) if n.type in ["bulletList", "orderedList"]])
244
+ md_code_blocks = len([n for n in self._flatten_md_nodes(md_nodes) if n.type == "codeBlock"])
245
+
246
+ adf_headings = len(adf_doc.root.find_nodes_by_type(AdfNodeType.HEADING))
247
+ adf_lists = len(adf_doc.root.find_nodes_by_type(AdfNodeType.BULLET_LIST)) + len(
248
+ adf_doc.root.find_nodes_by_type(AdfNodeType.ORDERED_LIST)
249
+ )
250
+ adf_code_blocks = len(adf_doc.root.find_nodes_by_type(AdfNodeType.CODE_BLOCK))
251
+
252
+ # Compare counts
253
+ if md_headings != adf_headings:
254
+ report.add_difference(
255
+ DifferenceType.STRUCTURE_MISMATCH,
256
+ "headings",
257
+ md_headings,
258
+ adf_headings,
259
+ severity="high",
260
+ description=f"Heading count mismatch: {md_headings} in markdown vs {adf_headings} in ADF",
261
+ )
262
+
263
+ if md_lists != adf_lists:
264
+ report.add_difference(
265
+ DifferenceType.STRUCTURE_MISMATCH,
266
+ "lists",
267
+ md_lists,
268
+ adf_lists,
269
+ severity="medium",
270
+ description=f"List count mismatch: {md_lists} in markdown vs {adf_lists} in ADF",
271
+ )
272
+
273
+ if md_code_blocks != adf_code_blocks:
274
+ report.add_difference(
275
+ DifferenceType.STRUCTURE_MISMATCH,
276
+ "code_blocks",
277
+ md_code_blocks,
278
+ adf_code_blocks,
279
+ severity="medium",
280
+ description=f"Code block count mismatch: {md_code_blocks} in markdown vs {adf_code_blocks} in ADF",
281
+ )
282
+
283
+ def _compare_text_content(
284
+ self,
285
+ md_nodes: List[MarkdownNode],
286
+ adf_doc: AdfDocument,
287
+ report: ComparisonReport,
288
+ ) -> None:
289
+ """Compare text content between markdown and ADF."""
290
+ # Get text from markdown
291
+ md_text = self._extract_markdown_text(md_nodes)
292
+
293
+ # Get text from ADF
294
+ adf_text = adf_doc.get_all_text()
295
+
296
+ # Normalize whitespace for comparison
297
+ md_text_normalized = " ".join(md_text.split())
298
+ adf_text_normalized = " ".join(adf_text.split())
299
+
300
+ # Calculate similarity
301
+ similarity = difflib.SequenceMatcher(None, md_text_normalized, adf_text_normalized).ratio()
302
+
303
+ if similarity < 0.95: # Less than 95% similar
304
+ report.add_difference(
305
+ DifferenceType.TEXT_MISMATCH,
306
+ "text_content",
307
+ "similarity: 100%",
308
+ f"similarity: {similarity * 100:.1f}%",
309
+ severity="high" if similarity < 0.8 else "medium",
310
+ description=f"Text content similarity is {similarity * 100:.1f}%",
311
+ )
312
+
313
+ def _compare_links(
314
+ self,
315
+ md_nodes: List[MarkdownNode],
316
+ adf_doc: AdfDocument,
317
+ report: ComparisonReport,
318
+ ) -> None:
319
+ """Compare links between markdown and ADF."""
320
+ # Extract links from markdown
321
+ md_links = self._extract_markdown_links(md_nodes)
322
+
323
+ # Get links from ADF
324
+ adf_links = adf_doc.find_links()
325
+
326
+ # Compare counts
327
+ if len(md_links) != len(adf_links):
328
+ report.add_difference(
329
+ DifferenceType.LINK_MISMATCH,
330
+ "links",
331
+ len(md_links),
332
+ len(adf_links),
333
+ severity="medium",
334
+ description=f"Link count mismatch: {len(md_links)} in markdown vs {len(adf_links)} in ADF",
335
+ )
336
+
337
+ def _compare_adf_documents(
338
+ self,
339
+ generated: AdfDocument,
340
+ existing: AdfDocument,
341
+ report: ComparisonReport,
342
+ ) -> None:
343
+ """Compare two ADF documents."""
344
+ # Compare node statistics
345
+ gen_stats = generated.get_node_statistics()
346
+ exist_stats = existing.get_node_statistics()
347
+
348
+ for node_type, count in gen_stats.items():
349
+ existing_count = exist_stats.get(node_type, 0)
350
+ if count != existing_count:
351
+ report.add_difference(
352
+ DifferenceType.STRUCTURE_MISMATCH,
353
+ f"node_count.{node_type}",
354
+ count,
355
+ existing_count,
356
+ severity="medium",
357
+ description=f"Node count for {node_type}: {count} generated vs {existing_count} existing",
358
+ )
359
+
360
+ # Compare text content
361
+ gen_text = generated.get_all_text()
362
+ exist_text = existing.get_all_text()
363
+
364
+ if gen_text != exist_text:
365
+ similarity = difflib.SequenceMatcher(None, gen_text, exist_text).ratio()
366
+ report.add_difference(
367
+ DifferenceType.TEXT_MISMATCH,
368
+ "text_content",
369
+ f"generated text ({len(gen_text)} chars)",
370
+ f"existing text ({len(exist_text)} chars)",
371
+ severity="high" if similarity < 0.8 else "medium",
372
+ description=f"Text similarity: {similarity * 100:.1f}%",
373
+ )
374
+
375
+ def _get_markdown_stats(self, nodes: List[MarkdownNode]) -> Dict[str, Any]:
376
+ """Get statistics about markdown nodes."""
377
+ all_nodes = self._flatten_md_nodes(nodes)
378
+ node_types = {}
379
+
380
+ for node in all_nodes:
381
+ node_type = node.type # type is already a string, not an enum
382
+ node_types[node_type] = node_types.get(node_type, 0) + 1
383
+
384
+ return {
385
+ "total_nodes": len(all_nodes),
386
+ "node_types": node_types,
387
+ }
388
+
389
+ def _flatten_md_nodes(self, nodes: List[MarkdownNode]) -> List[MarkdownNode]:
390
+ """Flatten markdown node tree into a list."""
391
+ result = []
392
+
393
+ for node in nodes:
394
+ result.append(node)
395
+ if hasattr(node, 'children') and node.children:
396
+ result.extend(self._flatten_md_nodes(node.children))
397
+
398
+ return result
399
+
400
+ def _extract_markdown_text(self, nodes: List[MarkdownNode]) -> str:
401
+ """Extract all text from markdown nodes."""
402
+ text_parts = []
403
+
404
+ for node in nodes:
405
+ if hasattr(node, 'content') and node.content:
406
+ text_parts.append(node.content)
407
+
408
+ if hasattr(node, 'children') and node.children:
409
+ text_parts.append(self._extract_markdown_text(node.children))
410
+
411
+ return " ".join(text_parts)
412
+
413
+ def _extract_markdown_links(self, nodes: List[MarkdownNode]) -> List[Dict[str, Any]]:
414
+ """Extract all links from markdown nodes."""
415
+ links = []
416
+
417
+ for node in self._flatten_md_nodes(nodes):
418
+ if node.type == "link":
419
+ links.append(
420
+ {
421
+ "url": getattr(node, 'url', None),
422
+ "title": getattr(node, 'title', None),
423
+ "text": getattr(node, 'content', None),
424
+ }
425
+ )
426
+
427
+ return links
@@ -0,0 +1,119 @@
1
+ """
2
+ Converter for transforming Markdown AST to ADF.
3
+
4
+ This module provides the main entry point for converting Markdown AST to
5
+ Atlassian Document Format (ADF).
6
+ """
7
+
8
+ from typing import Any, Dict, List
9
+
10
+ from docspan.backends.confluence.adf.converters import ConverterFactory
11
+ from docspan.backends.confluence.adf.nodes import AdfBuilder
12
+ from docspan.backends.confluence.markdown.ast import MarkdownNode
13
+
14
+
15
+ class AdfConverter:
16
+ """
17
+ Convert Markdown AST to Atlassian Document Format (ADF).
18
+
19
+ This class serves as a facade for the ADF conversion system, maintaining
20
+ backward compatibility with existing code while delegating to the new
21
+ visitor-based conversion system.
22
+ """
23
+
24
+ def __init__(self) -> None:
25
+ """Initialize the converter."""
26
+ self.builder = AdfBuilder()
27
+ self.converter = ConverterFactory.create_converter()
28
+
29
+ def convert(self, nodes: List[MarkdownNode], title: str = None, skip_first_h1_matching_title: bool = False) -> Dict[str, Any]:
30
+ """
31
+ Convert Markdown nodes to ADF.
32
+
33
+ Args:
34
+ nodes: List of Markdown AST nodes
35
+ title: Optional page title to compare against first H1
36
+ skip_first_h1_matching_title: If True, skip the first H1 heading that matches the title
37
+
38
+ Returns:
39
+ ADF document as a dictionary
40
+ """
41
+ # Filter nodes to skip first H1 if it matches the title
42
+ filtered_nodes = self._filter_duplicate_title_heading(nodes, title, skip_first_h1_matching_title)
43
+
44
+ # Get the standard ADF document
45
+ adf = self.converter.convert(filtered_nodes)
46
+
47
+ # Check if any nodes have HTML that needs to be inserted into the storage format
48
+ # This is a bit of a hack, but it allows us to inject raw HTML for things like iframes
49
+ # that aren't directly supported in ADF
50
+ for node in filtered_nodes:
51
+ if hasattr(node, 'storage_format_html') and node.storage_format_html:
52
+ # Store the HTML content to be handled during the page update process
53
+ # This information will be picked up by the page client
54
+ if not hasattr(adf, 'storage_format_html'):
55
+ adf['storage_format_html'] = []
56
+ adf['storage_format_html'].append(node.storage_format_html)
57
+
58
+ return adf
59
+
60
+ def _filter_duplicate_title_heading(
61
+ self,
62
+ nodes: List[MarkdownNode],
63
+ title: str = None,
64
+ skip_first_h1: bool = False
65
+ ) -> List[MarkdownNode]:
66
+ """
67
+ Filter out the first H1 heading if it matches the page title.
68
+
69
+ This prevents duplicate titles when the title is extracted from the first H1
70
+ and then that same H1 appears in the content.
71
+
72
+ Args:
73
+ nodes: List of Markdown AST nodes
74
+ title: The page title to compare against
75
+ skip_first_h1: Whether to perform filtering
76
+
77
+ Returns:
78
+ Filtered list of nodes
79
+ """
80
+ if not skip_first_h1 or not title or not nodes:
81
+ return nodes
82
+
83
+ # Find the first H1 heading
84
+ from docspan.backends.confluence.markdown.ast import HeadingNode
85
+
86
+ for i, node in enumerate(nodes):
87
+ if isinstance(node, HeadingNode) and node.level == 1:
88
+ # Extract text from the heading's children
89
+ heading_text = self._extract_text_from_nodes(node.children)
90
+
91
+ # If it matches the title, skip this node
92
+ if heading_text.strip() == title.strip():
93
+ return nodes[:i] + nodes[i+1:]
94
+
95
+ # If we found an H1 that doesn't match, don't filter anything
96
+ break
97
+
98
+ return nodes
99
+
100
+ def _extract_text_from_nodes(self, nodes: List[MarkdownNode]) -> str:
101
+ """
102
+ Extract plain text from a list of nodes (recursively).
103
+
104
+ Args:
105
+ nodes: List of nodes to extract text from
106
+
107
+ Returns:
108
+ Concatenated text content
109
+ """
110
+ from docspan.backends.confluence.markdown.ast import TextNode
111
+
112
+ text_parts = []
113
+ for node in nodes:
114
+ if isinstance(node, TextNode):
115
+ text_parts.append(node.content)
116
+ elif hasattr(node, 'children'):
117
+ text_parts.append(self._extract_text_from_nodes(node.children))
118
+
119
+ return ''.join(text_parts)