docspan 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. docspan/__init__.py +3 -0
  2. docspan/__main__.py +0 -0
  3. docspan/backends/__init__.py +19 -0
  4. docspan/backends/base.py +85 -0
  5. docspan/backends/confluence/__init__.py +0 -0
  6. docspan/backends/confluence/adf/__init__.py +14 -0
  7. docspan/backends/confluence/adf/comparator.py +427 -0
  8. docspan/backends/confluence/adf/converter.py +119 -0
  9. docspan/backends/confluence/adf/converters.py +1449 -0
  10. docspan/backends/confluence/adf/interfaces.py +191 -0
  11. docspan/backends/confluence/adf/nodes.py +2085 -0
  12. docspan/backends/confluence/adf/parser.py +400 -0
  13. docspan/backends/confluence/adf/validators.py +161 -0
  14. docspan/backends/confluence/adf/visitors.py +495 -0
  15. docspan/backends/confluence/backend.py +227 -0
  16. docspan/backends/confluence/client.py +44 -0
  17. docspan/backends/confluence/config/__init__.py +21 -0
  18. docspan/backends/confluence/config/loader.py +107 -0
  19. docspan/backends/confluence/config/models.py +167 -0
  20. docspan/backends/confluence/config/validation.py +297 -0
  21. docspan/backends/confluence/markdown/__init__.py +22 -0
  22. docspan/backends/confluence/markdown/ast.py +819 -0
  23. docspan/backends/confluence/markdown/extensions/__init__.py +5 -0
  24. docspan/backends/confluence/markdown/extensions/frontmatter.py +80 -0
  25. docspan/backends/confluence/markdown/extensions/mermaid.py +64 -0
  26. docspan/backends/confluence/markdown/extensions/wikilinks.py +179 -0
  27. docspan/backends/confluence/markdown/inline_parser.py +495 -0
  28. docspan/backends/confluence/markdown/parser.py +1006 -0
  29. docspan/backends/confluence/models/__init__.py +18 -0
  30. docspan/backends/confluence/models/markdown_file.py +402 -0
  31. docspan/backends/confluence/models/page.py +212 -0
  32. docspan/backends/confluence/models/path_utils.py +34 -0
  33. docspan/backends/confluence/models/results.py +28 -0
  34. docspan/backends/confluence/models/sync_status.py +382 -0
  35. docspan/backends/confluence/services/__init__.py +0 -0
  36. docspan/backends/confluence/services/confluence/__init__.py +40 -0
  37. docspan/backends/confluence/services/confluence/attachment_client.py +147 -0
  38. docspan/backends/confluence/services/confluence/base_client.py +420 -0
  39. docspan/backends/confluence/services/confluence/client.py +376 -0
  40. docspan/backends/confluence/services/confluence/comment_client.py +682 -0
  41. docspan/backends/confluence/services/confluence/crawler.py +587 -0
  42. docspan/backends/confluence/services/confluence/label_client.py +130 -0
  43. docspan/backends/confluence/services/confluence/page_client.py +1288 -0
  44. docspan/backends/confluence/services/confluence/space_client.py +179 -0
  45. docspan/backends/confluence/services/confluence/url_parser.py +106 -0
  46. docspan/backends/google_docs/__init__.py +0 -0
  47. docspan/backends/google_docs/auth.py +143 -0
  48. docspan/backends/google_docs/backend.py +140 -0
  49. docspan/backends/google_docs/client.py +665 -0
  50. docspan/backends/google_docs/converter.py +471 -0
  51. docspan/backends/google_docs/docs_request_builder.py +232 -0
  52. docspan/backends/google_docs/docs_structure_parser.py +120 -0
  53. docspan/backends/google_docs/markdown_to_paragraph_parser.py +145 -0
  54. docspan/cli/__init__.py +0 -0
  55. docspan/cli/main.py +408 -0
  56. docspan/config.py +62 -0
  57. docspan/core/__init__.py +49 -0
  58. docspan/core/merge.py +30 -0
  59. docspan/core/orchestrator.py +332 -0
  60. docspan/core/paths.py +8 -0
  61. docspan/core/state.py +53 -0
  62. docspan-0.1.0.dist-info/METADATA +273 -0
  63. docspan-0.1.0.dist-info/RECORD +65 -0
  64. docspan-0.1.0.dist-info/WHEEL +4 -0
  65. docspan-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,495 @@
1
+ """
2
+ Visitors for converting Markdown nodes to ADF.
3
+
4
+ This module implements the visitor pattern for converting Markdown nodes
5
+ to Atlassian Document Format (ADF) nodes.
6
+ """
7
+
8
+ import logging
9
+ from typing import Dict, List, Optional, cast
10
+
11
+ from docspan.backends.confluence.adf.interfaces import BaseNodeVisitor, NodeConverter, NodeRegistry
12
+ from docspan.backends.confluence.adf.nodes import AdfBuilder, AdfNode
13
+ from docspan.backends.confluence.markdown.ast import (
14
+ BlockquoteNode,
15
+ BulletListNode,
16
+ CodeBlockNode,
17
+ HeadingNode,
18
+ ImageNode,
19
+ InlineCodeNode,
20
+ LinkNode,
21
+ ListItemNode,
22
+ MarkdownNode,
23
+ MermaidNode,
24
+ OrderedListNode,
25
+ ParagraphNode,
26
+ TableNode,
27
+ TextNode,
28
+ WikiLinkNode,
29
+ )
30
+
31
+
32
+ class NodeVisitorRegistry(NodeRegistry):
33
+ """
34
+ Registry for node converters.
35
+
36
+ Attributes:
37
+ converters: Dictionary mapping node types to converters
38
+ logger: Logger instance
39
+ """
40
+
41
+ def __init__(self):
42
+ """Initialize the registry."""
43
+ self.converters: Dict[str, NodeConverter] = {}
44
+ self.logger = logging.getLogger(__name__)
45
+
46
+ def register(self, node_type: str, converter: NodeConverter) -> None:
47
+ """Register a converter for a specific node type."""
48
+ self.converters[node_type] = converter
49
+ self.logger.debug(f"Registered converter for node type: {node_type}")
50
+
51
+ def get(self, node_type: str) -> Optional[NodeConverter]:
52
+ """Get the converter for a specific node type."""
53
+ return self.converters.get(node_type)
54
+
55
+ def has(self, node_type: str) -> bool:
56
+ """Check if a converter exists for a specific node type."""
57
+ return node_type in self.converters
58
+
59
+
60
+ class AdfNodeVisitor:
61
+ """
62
+ Visitor for converting Markdown nodes to ADF.
63
+
64
+ This visitor traverses a Markdown AST and converts each node to its
65
+ ADF representation using registered converters.
66
+
67
+ Attributes:
68
+ registry: Registry of node converters
69
+ builder: ADF node builder
70
+ logger: Logger instance
71
+ """
72
+
73
+ def __init__(self, registry: NodeRegistry, builder: AdfBuilder):
74
+ """
75
+ Initialize the visitor.
76
+
77
+ Args:
78
+ registry: Registry of node converters
79
+ builder: ADF node builder
80
+ """
81
+ self.registry = registry
82
+ self.builder = builder
83
+ self.logger = logging.getLogger(__name__)
84
+
85
+ def visit(self, node: MarkdownNode) -> AdfNode:
86
+ """
87
+ Visit a markdown node and convert it to an ADF node.
88
+
89
+ Args:
90
+ node: Markdown node to convert
91
+
92
+ Returns:
93
+ Converted ADF node
94
+
95
+ Raises:
96
+ ValueError: If no converter is found for the node type
97
+ """
98
+ converter = self.registry.get(node.type)
99
+ if converter:
100
+ return converter.convert(node)
101
+
102
+ self.logger.error(f"No converter found for node type: {node.type}")
103
+ raise ValueError(f"Unknown node type: {node.type}")
104
+
105
+ def visit_children(self, node: MarkdownNode) -> List[AdfNode]:
106
+ """
107
+ Visit and convert all child nodes.
108
+
109
+ Args:
110
+ node: Parent node
111
+
112
+ Returns:
113
+ List of converted child nodes
114
+ """
115
+ return [self.visit(child) for child in node.children]
116
+
117
+
118
+ class TextNodeVisitor(BaseNodeVisitor):
119
+ """Visitor for text nodes."""
120
+
121
+ node_type = "text"
122
+
123
+ def __init__(self, builder: AdfBuilder):
124
+ """Initialize the visitor."""
125
+ self.builder = builder
126
+
127
+ def visit(self, node: MarkdownNode) -> AdfNode:
128
+ """Convert a text node to ADF."""
129
+ text_node = cast(TextNode, node)
130
+
131
+ # If the node has no content, return an empty text node
132
+ if not text_node.content:
133
+ return self.builder.text("")
134
+
135
+ # Process marks if they exist
136
+ marks = list(text_node.marks)
137
+
138
+ return self.builder.text(text_node.content, marks)
139
+
140
+
141
+ class HeadingNodeVisitor(BaseNodeVisitor):
142
+ """Visitor for heading nodes."""
143
+
144
+ node_type = "heading"
145
+
146
+ def __init__(self, builder: AdfBuilder, parent_visitor: AdfNodeVisitor):
147
+ """Initialize the visitor."""
148
+ self.builder = builder
149
+ self.parent_visitor = parent_visitor
150
+
151
+ def visit(self, node: MarkdownNode) -> AdfNode:
152
+ """Convert a heading node to ADF."""
153
+ heading_node = cast(HeadingNode, node)
154
+ children = self.parent_visitor.visit_children(heading_node)
155
+ return self.builder.heading(children, heading_node.level)
156
+
157
+
158
+ class ParagraphNodeVisitor(BaseNodeVisitor):
159
+ """Visitor for paragraph nodes."""
160
+
161
+ node_type = "paragraph"
162
+
163
+ def __init__(self, builder: AdfBuilder, parent_visitor: AdfNodeVisitor):
164
+ """Initialize the visitor."""
165
+ self.builder = builder
166
+ self.parent_visitor = parent_visitor
167
+
168
+ def visit(self, node: MarkdownNode) -> AdfNode:
169
+ """Convert a paragraph node to ADF."""
170
+ paragraph_node = cast(ParagraphNode, node)
171
+ children = self.parent_visitor.visit_children(paragraph_node)
172
+ return self.builder.paragraph(children)
173
+
174
+
175
+ class ListItemNodeVisitor(BaseNodeVisitor):
176
+ """Visitor for list item nodes."""
177
+
178
+ node_type = "listItem"
179
+
180
+ def __init__(self, builder: AdfBuilder, parent_visitor: AdfNodeVisitor):
181
+ """Initialize the visitor."""
182
+ self.builder = builder
183
+ self.parent_visitor = parent_visitor
184
+
185
+ def visit(self, node: MarkdownNode) -> AdfNode:
186
+ """Convert a list item node to ADF."""
187
+ list_item_node = cast(ListItemNode, node)
188
+ children = self.parent_visitor.visit_children(list_item_node)
189
+ return self.builder.list_item(children)
190
+
191
+
192
+ class BulletListNodeVisitor(BaseNodeVisitor):
193
+ """Visitor for bullet list nodes."""
194
+
195
+ node_type = "bulletList"
196
+
197
+ def __init__(self, builder: AdfBuilder, parent_visitor: AdfNodeVisitor):
198
+ """Initialize the visitor."""
199
+ self.builder = builder
200
+ self.parent_visitor = parent_visitor
201
+
202
+ def visit(self, node: MarkdownNode) -> AdfNode:
203
+ """Convert a bullet list node to ADF."""
204
+ bullet_list_node = cast(BulletListNode, node)
205
+ children = self.parent_visitor.visit_children(bullet_list_node)
206
+ return self.builder.bullet_list(children)
207
+
208
+
209
+ class OrderedListNodeVisitor(BaseNodeVisitor):
210
+ """Visitor for ordered list nodes."""
211
+
212
+ node_type = "orderedList"
213
+
214
+ def __init__(self, builder: AdfBuilder, parent_visitor: AdfNodeVisitor):
215
+ """Initialize the visitor."""
216
+ self.builder = builder
217
+ self.parent_visitor = parent_visitor
218
+
219
+ def visit(self, node: MarkdownNode) -> AdfNode:
220
+ """Convert a ordered list node to ADF."""
221
+ ordered_list_node = cast(OrderedListNode, node)
222
+ children = self.parent_visitor.visit_children(ordered_list_node)
223
+ return self.builder.ordered_list(children)
224
+
225
+
226
+ class CodeBlockNodeVisitor(BaseNodeVisitor):
227
+ """Visitor for code block nodes."""
228
+
229
+ node_type = "codeBlock"
230
+
231
+ def __init__(self, builder: AdfBuilder):
232
+ """Initialize the visitor."""
233
+ self.builder = builder
234
+
235
+ def visit(self, node: MarkdownNode) -> AdfNode:
236
+ """Convert a code block node to ADF."""
237
+ code_block_node = cast(CodeBlockNode, node)
238
+ return self.builder.code_block(code_block_node.content or "", code_block_node.language)
239
+
240
+
241
+ class InlineCodeNodeVisitor(BaseNodeVisitor):
242
+ """Visitor for inline code nodes."""
243
+
244
+ node_type = "inlineCode"
245
+
246
+ def __init__(self):
247
+ """Initialize the visitor."""
248
+ pass
249
+
250
+ def visit(self, node: MarkdownNode) -> AdfNode:
251
+ """Convert an inline code node to ADF."""
252
+ inline_code_node = cast(InlineCodeNode, node)
253
+ return AdfNode(type="text", text=inline_code_node.content or "", marks=[{"type": "code"}])
254
+
255
+
256
+ class BlockquoteNodeVisitor(BaseNodeVisitor):
257
+ """Visitor for blockquote nodes."""
258
+
259
+ node_type = "blockquote"
260
+
261
+ def __init__(self, builder: AdfBuilder, parent_visitor: AdfNodeVisitor):
262
+ """Initialize the visitor."""
263
+ self.builder = builder
264
+ self.parent_visitor = parent_visitor
265
+
266
+ def visit(self, node: MarkdownNode) -> AdfNode:
267
+ """Convert a blockquote node to ADF."""
268
+ blockquote_node = cast(BlockquoteNode, node)
269
+ children = self.parent_visitor.visit_children(blockquote_node)
270
+ return self.builder.blockquote(children)
271
+
272
+
273
+ class TableNodeVisitor(BaseNodeVisitor):
274
+ """Visitor for table nodes."""
275
+
276
+ node_type = "table"
277
+
278
+ def __init__(self, builder: AdfBuilder, parent_visitor: AdfNodeVisitor):
279
+ """Initialize the visitor."""
280
+ self.builder = builder
281
+ self.parent_visitor = parent_visitor
282
+
283
+ def visit(self, node: MarkdownNode) -> AdfNode:
284
+ """Convert a table node to ADF."""
285
+ table_node = cast(TableNode, node)
286
+
287
+ # Convert rows to ADF nodes
288
+ adf_rows = []
289
+
290
+ # Handle headers if present
291
+ has_headers = bool(table_node.headers)
292
+
293
+ if has_headers:
294
+ header_cells = []
295
+ for header in table_node.headers:
296
+ header_content = self.builder.paragraph([self.builder.text(header)])
297
+ header_cells.append(header_content)
298
+ adf_rows.append(header_cells)
299
+
300
+ # Handle data rows
301
+ for row in table_node.rows:
302
+ row_cells = []
303
+ for cell in row:
304
+ if isinstance(cell, MarkdownNode):
305
+ row_cells.append(self.parent_visitor.visit(cell))
306
+ else:
307
+ # Handle case where cell is raw content
308
+ cell_content = self.builder.paragraph([self.builder.text(str(cell))])
309
+ row_cells.append(cell_content)
310
+ adf_rows.append(row_cells)
311
+
312
+ return self.builder.table(adf_rows, has_headers)
313
+
314
+
315
+ class LinkNodeVisitor(BaseNodeVisitor):
316
+ """Visitor for link nodes."""
317
+
318
+ node_type = "link"
319
+
320
+ def __init__(self, builder: AdfBuilder, parent_visitor: AdfNodeVisitor):
321
+ """Initialize the visitor."""
322
+ self.builder = builder
323
+ self.parent_visitor = parent_visitor
324
+
325
+ def visit(self, node: MarkdownNode) -> AdfNode:
326
+ """Convert a link node to ADF."""
327
+ link_node = cast(LinkNode, node)
328
+
329
+ # Extract text content from child nodes or use link content
330
+ text_content = " ".join(
331
+ child.content or ""
332
+ for child in link_node.children if hasattr(child, 'content')
333
+ ) or link_node.content or ""
334
+
335
+ return self.builder.link(text_content, link_node.url, link_node.title)
336
+
337
+
338
+ class ImageNodeVisitor(BaseNodeVisitor):
339
+ """Visitor for image nodes."""
340
+
341
+ node_type = "image"
342
+
343
+ def __init__(self, builder: AdfBuilder):
344
+ """Initialize the visitor."""
345
+ self.builder = builder
346
+
347
+ def visit(self, node: MarkdownNode) -> AdfNode:
348
+ """Convert an image node to ADF."""
349
+ image_node = cast(ImageNode, node)
350
+ return self.builder.image(image_node.src, image_node.alt, image_node.title)
351
+
352
+
353
+ class WikiLinkNodeVisitor(BaseNodeVisitor):
354
+ """Visitor for wiki link nodes."""
355
+
356
+ node_type = "wikiLink"
357
+
358
+ def __init__(self, builder: AdfBuilder):
359
+ """Initialize the visitor."""
360
+ self.builder = builder
361
+
362
+ def visit(self, node: MarkdownNode) -> AdfNode:
363
+ """Convert a wiki link node to ADF."""
364
+ wiki_link_node = cast(WikiLinkNode, node)
365
+ display = wiki_link_node.display or wiki_link_node.target
366
+ return self.builder.link(display, f"#{wiki_link_node.target}")
367
+
368
+
369
+ class HorizontalRuleNodeVisitor(BaseNodeVisitor):
370
+ """Visitor for horizontal rule nodes."""
371
+
372
+ node_type = "horizontalRule"
373
+
374
+ def __init__(self, builder: AdfBuilder):
375
+ """Initialize the visitor."""
376
+ self.builder = builder
377
+
378
+ def visit(self, node: MarkdownNode) -> AdfNode:
379
+ """Convert a horizontal rule node to ADF."""
380
+ return self.builder.horizontal_rule()
381
+
382
+
383
+ class MermaidNodeVisitor(BaseNodeVisitor):
384
+ """Visitor for mermaid diagram nodes."""
385
+
386
+ node_type = "mermaid"
387
+
388
+ def __init__(self, builder: AdfBuilder):
389
+ """Initialize the visitor."""
390
+ self.builder = builder
391
+ self.logger = logging.getLogger(__name__)
392
+
393
+ def visit(self, node: MarkdownNode) -> AdfNode:
394
+ """Convert a mermaid diagram node to ADF."""
395
+ mermaid_node = cast(MermaidNode, node)
396
+
397
+ self.logger.info("Processing Mermaid diagram for ADF conversion")
398
+ self.logger.debug(f"Mermaid node attributes: {mermaid_node.attrs}")
399
+ self.logger.debug(f"Mermaid code type: {mermaid_node.code.strip().split()[0] if mermaid_node.code.strip() else 'empty'}")
400
+
401
+ # Check if the diagram has been rendered as image
402
+ if "rendered_url" in mermaid_node.attrs:
403
+ # Get the rendered URL
404
+ image_url = mermaid_node.attrs["rendered_url"]
405
+ self.logger.info(f"Found rendered URL for Mermaid diagram: {image_url}")
406
+
407
+ # Create a more descriptive alt text if possible
408
+ alt_text = "Mermaid diagram"
409
+ if mermaid_node.code.strip().startswith("flowchart") or mermaid_node.code.strip().startswith("graph"):
410
+ alt_text = "Flowchart diagram"
411
+ elif mermaid_node.code.strip().startswith("sequenceDiagram"):
412
+ alt_text = "Sequence diagram"
413
+ elif mermaid_node.code.strip().startswith("classDiagram"):
414
+ alt_text = "Class diagram"
415
+
416
+ self.logger.info(f"Converting Mermaid diagram to ADF image with alt text: {alt_text}")
417
+
418
+ # Check if we have an iframe HTML for direct embedding
419
+ if "iframe_html" in mermaid_node.attrs:
420
+ # Create a paragraph with HTML content using the raw macro
421
+ self.logger.info("Using iframe HTML embedding for diagram")
422
+ html_content = mermaid_node.attrs["iframe_html"]
423
+
424
+ # Create a paragraph node that will be replaced with the HTML content
425
+ note_paragraph = self.builder.paragraph([
426
+ self.builder.text("")
427
+ ])
428
+
429
+ # Store HTML content in a special attribute that will be handled during storage format conversion
430
+ note_paragraph.storage_format_html = html_content
431
+
432
+ return note_paragraph
433
+
434
+ # Otherwise add the diagram as an image with proper alt text using Confluence Storage Format approach
435
+ self.logger.debug(f"Creating ADF image node with URL: {image_url}")
436
+ try:
437
+ # First try the standard ADF approach
438
+ image_node = self.builder.image(image_url, alt_text)
439
+ self.logger.debug(f"Successfully created ADF image node: {image_node.to_dict()}")
440
+
441
+ # Add attachment ID if available, as this is crucial for proper rendering
442
+ if "attachment_id" in mermaid_node.attrs:
443
+ attachment_id = mermaid_node.attrs["attachment_id"]
444
+ self.logger.info(f"Adding attachment ID {attachment_id} to image node")
445
+
446
+ # Override attributes with file-based reference that Confluence understands better
447
+ image_node.attrs = {
448
+ "type": "file",
449
+ "id": attachment_id,
450
+ "collection": "contentId"
451
+ }
452
+
453
+ if alt_text:
454
+ image_node.attrs["alt"] = alt_text
455
+
456
+ self.logger.debug(f"Updated image node with attachment ID: {image_node.to_dict()}")
457
+
458
+ return image_node
459
+ except Exception as e:
460
+ self.logger.error(f"Error creating image node for mermaid diagram: {e}")
461
+ # Fallback to code block
462
+ return self.builder.code_block(mermaid_node.code, "mermaid")
463
+
464
+ # Check if the diagram has been rendered as iframe embed
465
+ elif "embed_html" in mermaid_node.attrs:
466
+ # Get the HTML
467
+ mermaid_node.attrs["embed_html"]
468
+ self.logger.info("Found embed HTML for Mermaid diagram")
469
+
470
+ # Instead of trying to embed HTML, use a link to the mermaid live diagram
471
+ live_link = mermaid_node.attrs["live_link"]
472
+ self.logger.info(f"Using link to Mermaid Live diagram: {live_link}")
473
+
474
+ # Create a paragraph with a link
475
+ return self.builder.paragraph([
476
+ self.builder.text("This diagram is also available at: "),
477
+ self.builder.link("Mermaid Live View", live_link, "Click to view the diagram in Mermaid Live Editor")
478
+ ])
479
+
480
+ # Check if the diagram has a live link
481
+ elif "live_link" in mermaid_node.attrs:
482
+ # Get the live link
483
+ live_link = mermaid_node.attrs["live_link"]
484
+ self.logger.info(f"Found Mermaid Live link: {live_link}")
485
+
486
+ # Create a panel with the link
487
+ return self.builder.link("View Mermaid diagram", live_link, "Mermaid Live diagram")
488
+
489
+ # Log the issue and fall back to code block
490
+ self.logger.warning("Mermaid diagram could not be rendered as image or embedded, using code block fallback")
491
+ self.logger.warning("No 'rendered_url', 'embed_html', or 'live_link' attribute found in the mermaid node")
492
+ self.logger.debug(f"Available attributes: {list(mermaid_node.attrs.keys())}")
493
+
494
+ # Fallback to a code block if rendering failed
495
+ return self.builder.code_block(mermaid_node.code, "mermaid")