pull-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pull_cli/macros.py ADDED
@@ -0,0 +1,527 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import re
5
+ from dataclasses import dataclass, field
6
+
7
+ from bs4 import BeautifulSoup, Tag
8
+
9
+ from .markdown_writer import rendered_html_to_markdown
10
+ from .models import AttachmentRecord, MacroRecord, PullOptions, WarningRecord
11
+
12
+
13
+ @dataclass
14
+ class MacroInstance:
15
+ macro_id: str
16
+ name: str
17
+ params: dict[str, str]
18
+ body: str
19
+ raw: str
20
+
21
+
22
+ @dataclass
23
+ class MacroContext:
24
+ page_id: str
25
+ attachments: list[AttachmentRecord]
26
+ options: PullOptions
27
+ child_links: list[tuple[str, str]] = field(default_factory=list)
28
+
29
+
30
+ class MacroAdapter:
31
+ names: set[str] = set()
32
+ adapter_name = "unknown"
33
+
34
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
35
+ return unknown_macro(macro, context)
36
+
37
+
38
+ class PanelAdapter(MacroAdapter):
39
+ names = {"info", "note", "tip", "warning", "panel"}
40
+ adapter_name = "panel"
41
+
42
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
43
+ label = macro.name.upper() if macro.name != "panel" else "PANEL"
44
+ title = macro.params.get("title") or macro.params.get("name") or label.title()
45
+ body = storage_fragment_to_markdown(macro.body)
46
+ quoted = "\n".join(f"> {line}" if line else ">" for line in body.splitlines())
47
+ markdown = f"> [!{label}] {title}\n{quoted}".strip()
48
+ return MacroRecord(
49
+ macro_id=macro.macro_id,
50
+ name=macro.name,
51
+ adapter=self.adapter_name,
52
+ source_page_id=context.page_id,
53
+ markdown=markdown,
54
+ params=macro.params,
55
+ )
56
+
57
+
58
+ class CodeAdapter(MacroAdapter):
59
+ names = {"code", "noformat"}
60
+ adapter_name = "code"
61
+
62
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
63
+ language = macro.params.get("language") or macro.params.get("lang") or ""
64
+ code = plain_text(macro.body).strip("\n")
65
+ fence_language = re.sub(r"[^A-Za-z0-9_+.-]", "", language)
66
+ markdown = f"```{fence_language}\n{code}\n```"
67
+ return MacroRecord(
68
+ macro_id=macro.macro_id,
69
+ name=macro.name,
70
+ adapter=self.adapter_name,
71
+ source_page_id=context.page_id,
72
+ markdown=markdown,
73
+ params=macro.params,
74
+ )
75
+
76
+
77
+ class StatusAdapter(MacroAdapter):
78
+ names = {"status"}
79
+ adapter_name = "status"
80
+
81
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
82
+ text = macro.params.get("title") or macro.params.get("text") or plain_text(macro.body).strip()
83
+ color = macro.params.get("colour") or macro.params.get("color") or macro.params.get("subtle") or "default"
84
+ return MacroRecord(
85
+ macro_id=macro.macro_id,
86
+ name=macro.name,
87
+ adapter=self.adapter_name,
88
+ source_page_id=context.page_id,
89
+ markdown=f"[STATUS: {text} / {color}]",
90
+ params=macro.params,
91
+ )
92
+
93
+
94
+ class ExpandAdapter(MacroAdapter):
95
+ names = {"expand"}
96
+ adapter_name = "expand"
97
+
98
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
99
+ title = macro.params.get("title") or macro.params.get("name") or "Expand"
100
+ body = storage_fragment_to_markdown(macro.body)
101
+ markdown = f"### Expand: {title}\n\n{body}".strip()
102
+ return MacroRecord(
103
+ macro_id=macro.macro_id,
104
+ name=macro.name,
105
+ adapter=self.adapter_name,
106
+ source_page_id=context.page_id,
107
+ markdown=markdown,
108
+ params=macro.params,
109
+ )
110
+
111
+
112
+ class TabsAdapter(MacroAdapter):
113
+ names = {"tabs", "tab-group", "tabgroup", "composition-setup"}
114
+ adapter_name = "tabs"
115
+
116
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
117
+ nested = parse_macros(macro.body, top_level_only=True)
118
+ tab_blocks: list[str] = []
119
+ for index, tab in enumerate(nested, start=1):
120
+ if tab.name not in {"tab", "tab-pane", "aui-tab", "composition-tab"}:
121
+ continue
122
+ title = tab.params.get("title") or tab.params.get("name") or f"Tab {index}"
123
+ body = storage_fragment_to_markdown(tab.body)
124
+ tab_blocks.append(f"### Tab: {title}\n\n{body}".strip())
125
+ if not tab_blocks:
126
+ body = storage_fragment_to_markdown(macro.body)
127
+ tab_blocks.append(f"### Tabs\n\n{body}".strip())
128
+ return MacroRecord(
129
+ macro_id=macro.macro_id,
130
+ name=macro.name,
131
+ adapter=self.adapter_name,
132
+ source_page_id=context.page_id,
133
+ markdown="\n\n".join(tab_blocks),
134
+ params=macro.params,
135
+ )
136
+
137
+
138
+ class FlattenAdapter(MacroAdapter):
139
+ names = {"section", "column", "layout", "layout-section", "layout-cell"}
140
+ adapter_name = "layout"
141
+
142
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
143
+ return MacroRecord(
144
+ macro_id=macro.macro_id,
145
+ name=macro.name,
146
+ adapter=self.adapter_name,
147
+ source_page_id=context.page_id,
148
+ markdown=storage_fragment_to_markdown(macro.body),
149
+ params=macro.params,
150
+ )
151
+
152
+
153
+ class TocAdapter(MacroAdapter):
154
+ names = {"toc"}
155
+ adapter_name = "toc"
156
+
157
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
158
+ return MacroRecord(
159
+ macro_id=macro.macro_id,
160
+ name=macro.name,
161
+ adapter=self.adapter_name,
162
+ source_page_id=context.page_id,
163
+ status="placeholder",
164
+ markdown="[Table of contents macro omitted: headings are present in the page Markdown.]",
165
+ params=macro.params,
166
+ )
167
+
168
+
169
+ class ChildrenAdapter(MacroAdapter):
170
+ names = {"children", "pagetree", "page-tree"}
171
+ adapter_name = "children"
172
+
173
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
174
+ if context.child_links:
175
+ items = "\n".join(f"- [{title}]({link})" for title, link in context.child_links)
176
+ markdown = f"### Child Pages\n\n{items}"
177
+ else:
178
+ markdown = "[Children/page tree macro: no in-scope child pages were available in this pull.]"
179
+ return MacroRecord(
180
+ macro_id=macro.macro_id,
181
+ name=macro.name,
182
+ adapter=self.adapter_name,
183
+ source_page_id=context.page_id,
184
+ markdown=markdown,
185
+ params=macro.params,
186
+ )
187
+
188
+
189
+ class IncludeAdapter(MacroAdapter):
190
+ names = {"include", "excerpt-include", "multi-excerpt-include", "excerpt"}
191
+ adapter_name = "include"
192
+
193
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
194
+ target = macro.params.get("page") or macro.params.get("name") or macro.params.get("default-parameter-value") or "unknown"
195
+ warnings: list[WarningRecord] = []
196
+ if context.options.follow_includes:
197
+ body = storage_fragment_to_markdown(macro.body)
198
+ markdown = body or f"[Include/excerpt target {target!r} requested for follow, but no inline body was available.]"
199
+ warnings.append(
200
+ WarningRecord(
201
+ code="W_MACRO_PARTIAL",
202
+ message="Include/excerpt follow was requested but only inline source content was available.",
203
+ source_page_id=context.page_id,
204
+ details={"target": target},
205
+ )
206
+ )
207
+ else:
208
+ markdown = f"[Include/excerpt dependency not followed: {target}]"
209
+ warnings.append(
210
+ WarningRecord(
211
+ code="W_MACRO_PARTIAL",
212
+ message="Include/excerpt macro was represented as a dependency placeholder.",
213
+ source_page_id=context.page_id,
214
+ details={"target": target},
215
+ )
216
+ )
217
+ return MacroRecord(
218
+ macro_id=macro.macro_id,
219
+ name=macro.name,
220
+ adapter=self.adapter_name,
221
+ source_page_id=context.page_id,
222
+ status="placeholder" if not context.options.follow_includes else "converted",
223
+ markdown=markdown,
224
+ params=macro.params,
225
+ warnings=warnings,
226
+ )
227
+
228
+
229
+ class AttachmentsAdapter(MacroAdapter):
230
+ names = {"attachments"}
231
+ adapter_name = "attachments"
232
+
233
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
234
+ if context.attachments:
235
+ items = "\n".join(f"- {attachment.filename}" for attachment in context.attachments)
236
+ markdown = f"### Attachments\n\n{items}"
237
+ else:
238
+ markdown = "[Attachments macro: no attachments were returned by Confluence.]"
239
+ return MacroRecord(
240
+ macro_id=macro.macro_id,
241
+ name=macro.name,
242
+ adapter=self.adapter_name,
243
+ source_page_id=context.page_id,
244
+ markdown=markdown,
245
+ params=macro.params,
246
+ )
247
+
248
+
249
+ class ViewFileAdapter(MacroAdapter):
250
+ names = {"view-file", "office-excel", "office-powerpoint", "office-word", "viewpdf", "pdf"}
251
+ adapter_name = "view-file"
252
+
253
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
254
+ filename = macro.params.get("name") or macro.params.get("filename") or _attachment_name(macro.raw) or "file"
255
+ return MacroRecord(
256
+ macro_id=macro.macro_id,
257
+ name=macro.name,
258
+ adapter=self.adapter_name,
259
+ source_page_id=context.page_id,
260
+ markdown=f"[Displayed file attachment: {filename}]",
261
+ params=macro.params,
262
+ )
263
+
264
+
265
+ class JiraAdapter(MacroAdapter):
266
+ names = {"jira", "jiraissues", "jiraportlet"}
267
+ adapter_name = "jira"
268
+
269
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
270
+ query = macro.params.get("jqlQuery") or macro.params.get("key") or macro.params.get("url") or "unavailable"
271
+ return MacroRecord(
272
+ macro_id=macro.macro_id,
273
+ name=macro.name,
274
+ adapter=self.adapter_name,
275
+ source_page_id=context.page_id,
276
+ status="placeholder",
277
+ markdown=f"[Jira macro snapshot placeholder: {query}]",
278
+ params=macro.params,
279
+ )
280
+
281
+
282
+ class DiagramAdapter(MacroAdapter):
283
+ names = {
284
+ "gliffy",
285
+ "drawio",
286
+ "draw.io",
287
+ "mermaid",
288
+ "plantuml",
289
+ "plantumlrender",
290
+ "diagram",
291
+ }
292
+ adapter_name = "diagram"
293
+
294
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
295
+ title = macro.params.get("name") or macro.params.get("title") or macro.name
296
+ body = plain_text(macro.body).strip()
297
+ markdown = f"[Diagram macro snapshot: {title}]"
298
+ if body:
299
+ markdown += f"\n\n```text\n{body}\n```"
300
+ warnings: list[WarningRecord] = []
301
+ if context.options.diagram_sources and not body:
302
+ warnings.append(
303
+ WarningRecord(
304
+ code="W_ASSET_DIAGRAM_SOURCE_NOT_FOUND",
305
+ message="Diagram source was requested but was not discoverable in storage.",
306
+ source_page_id=context.page_id,
307
+ details={"macro": macro.name, "title": title},
308
+ )
309
+ )
310
+ return MacroRecord(
311
+ macro_id=macro.macro_id,
312
+ name=macro.name,
313
+ adapter=self.adapter_name,
314
+ source_page_id=context.page_id,
315
+ markdown=markdown,
316
+ params=macro.params,
317
+ warnings=warnings,
318
+ )
319
+
320
+
321
+ class DynamicAdapter(MacroAdapter):
322
+ names = {"recently-updated", "contentbylabel", "content-by-label", "task-report", "roadmap"}
323
+ adapter_name = "dynamic"
324
+
325
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
326
+ warning = WarningRecord(
327
+ code="W_DYNAMIC_MACRO_SNAPSHOT",
328
+ message="Dynamic macro is represented as a pull-time rendered snapshot where available.",
329
+ source_page_id=context.page_id,
330
+ details={"macro": macro.name},
331
+ )
332
+ return MacroRecord(
333
+ macro_id=macro.macro_id,
334
+ name=macro.name,
335
+ adapter=self.adapter_name,
336
+ source_page_id=context.page_id,
337
+ status="placeholder",
338
+ markdown=f"[Dynamic macro snapshot: {macro.name}]",
339
+ params=macro.params,
340
+ warnings=[warning],
341
+ )
342
+
343
+
344
+ class HtmlAdapter(MacroAdapter):
345
+ names = {"html"}
346
+ adapter_name = "html"
347
+
348
+ def convert(self, macro: MacroInstance, context: MacroContext) -> MacroRecord:
349
+ soup = BeautifulSoup(macro.body, "lxml")
350
+ sanitized = False
351
+ for tag in soup.find_all(["script", "iframe", "object", "embed"]):
352
+ tag.decompose()
353
+ sanitized = True
354
+ markdown = rendered_html_to_markdown(str(soup)).strip() or "[HTML macro had no visible text.]"
355
+ warnings = []
356
+ if sanitized:
357
+ warnings.append(
358
+ WarningRecord(
359
+ code="W_SANITIZED_HTML",
360
+ message="Executable content was stripped from an HTML macro.",
361
+ source_page_id=context.page_id,
362
+ )
363
+ )
364
+ return MacroRecord(
365
+ macro_id=macro.macro_id,
366
+ name=macro.name,
367
+ adapter=self.adapter_name,
368
+ source_page_id=context.page_id,
369
+ markdown=markdown,
370
+ params=macro.params,
371
+ warnings=warnings,
372
+ )
373
+
374
+
375
+ class MacroRegistry:
376
+ def __init__(self) -> None:
377
+ adapters: list[MacroAdapter] = [
378
+ PanelAdapter(),
379
+ CodeAdapter(),
380
+ StatusAdapter(),
381
+ ExpandAdapter(),
382
+ TabsAdapter(),
383
+ FlattenAdapter(),
384
+ TocAdapter(),
385
+ ChildrenAdapter(),
386
+ IncludeAdapter(),
387
+ AttachmentsAdapter(),
388
+ ViewFileAdapter(),
389
+ JiraAdapter(),
390
+ DiagramAdapter(),
391
+ DynamicAdapter(),
392
+ HtmlAdapter(),
393
+ ]
394
+ self._adapters = {name: adapter for adapter in adapters for name in adapter.names}
395
+
396
+ def convert_all(self, storage: str | None, context: MacroContext) -> list[MacroRecord]:
397
+ records: list[MacroRecord] = []
398
+ for macro in parse_macros(storage or "", top_level_only=True):
399
+ adapter = self._adapters.get(macro.name)
400
+ if adapter:
401
+ records.append(adapter.convert(macro, context))
402
+ elif context.options.unknown_macro == "ignore":
403
+ records.append(
404
+ MacroRecord(
405
+ macro_id=macro.macro_id,
406
+ name=macro.name,
407
+ adapter="unknown",
408
+ source_page_id=context.page_id,
409
+ status="ignored",
410
+ params=macro.params,
411
+ )
412
+ )
413
+ else:
414
+ records.append(unknown_macro(macro, context))
415
+ return records
416
+
417
+
418
+ def parse_macros(storage: str, *, top_level_only: bool = False) -> list[MacroInstance]:
419
+ if not storage:
420
+ return []
421
+ soup = _storage_soup(storage)
422
+ tags = [tag for tag in soup.find_all(True) if _is_macro_tag(tag)]
423
+ if top_level_only:
424
+ tags = [tag for tag in tags if not any(_is_macro_tag(parent) for parent in tag.parents if isinstance(parent, Tag))]
425
+ macros = []
426
+ for index, tag in enumerate(tags, start=1):
427
+ name = _attr(tag, "ac:name", "name") or "unknown"
428
+ raw = str(tag)
429
+ macro_id = hashlib.sha1(f"{name}:{index}:{raw[:100]}".encode()).hexdigest()[:12]
430
+ macros.append(
431
+ MacroInstance(
432
+ macro_id=macro_id,
433
+ name=name.lower(),
434
+ params=_parameters(tag),
435
+ body=_body(tag),
436
+ raw=raw,
437
+ )
438
+ )
439
+ return macros
440
+
441
+
442
+ def storage_fragment_to_markdown(fragment: str) -> str:
443
+ if not fragment:
444
+ return ""
445
+ soup = _storage_soup(fragment)
446
+ for macro_tag in soup.find_all(True):
447
+ if _is_macro_tag(macro_tag):
448
+ macro_tag.replace_with(f"[Nested macro: {_attr(macro_tag, 'ac:name', 'name') or 'unknown'}]")
449
+ root = _storage_root(soup)
450
+ html_fragment = "".join(str(child) for child in root.contents)
451
+ return rendered_html_to_markdown(html_fragment).strip()
452
+
453
+
454
+ def plain_text(fragment: str) -> str:
455
+ if not fragment:
456
+ return ""
457
+ return _storage_root(_storage_soup(fragment)).get_text("\n")
458
+
459
+
460
+ def unknown_macro(macro: MacroInstance, context: MacroContext) -> MacroRecord:
461
+ warning = WarningRecord(
462
+ code="W_MACRO_UNKNOWN",
463
+ message=f"Unsupported macro {macro.name!r} was represented as a placeholder.",
464
+ source_page_id=context.page_id,
465
+ details={"macro": macro.name, "params": macro.params},
466
+ )
467
+ status = "error" if context.options.unknown_macro == "error" else "placeholder"
468
+ return MacroRecord(
469
+ macro_id=macro.macro_id,
470
+ name=macro.name,
471
+ adapter="unknown",
472
+ source_page_id=context.page_id,
473
+ status=status,
474
+ markdown=f"[Unsupported Confluence macro: {macro.name}; params={macro.params}]",
475
+ params=macro.params,
476
+ warnings=[warning],
477
+ )
478
+
479
+
480
+ def _is_macro_tag(tag: Tag) -> bool:
481
+ return tag.name.endswith("structured-macro") or tag.name.endswith("macro")
482
+
483
+
484
+ def _attr(tag: Tag, *names: str) -> str | None:
485
+ for name in names:
486
+ value = tag.attrs.get(name)
487
+ if isinstance(value, str):
488
+ return value
489
+ return None
490
+
491
+
492
+ def _parameters(tag: Tag) -> dict[str, str]:
493
+ params: dict[str, str] = {}
494
+ for parameter in tag.find_all(True):
495
+ if not parameter.name.endswith("parameter"):
496
+ continue
497
+ name = _attr(parameter, "ac:name", "name")
498
+ if not name:
499
+ continue
500
+ params[name] = parameter.get_text(" ", strip=True)
501
+ return params
502
+
503
+
504
+ def _body(tag: Tag) -> str:
505
+ for child in tag.find_all(True, recursive=False):
506
+ if child.name.endswith("rich-text-body") or child.name.endswith("plain-text-body"):
507
+ return "".join(str(part) for part in child.contents)
508
+ bodies = [child for child in tag.find_all(True) if child.name.endswith("rich-text-body") or child.name.endswith("plain-text-body")]
509
+ if bodies:
510
+ return "".join(str(part) for part in bodies[0].contents)
511
+ return ""
512
+
513
+
514
+ def _attachment_name(raw: str) -> str | None:
515
+ soup = _storage_soup(raw)
516
+ attachment = next((tag for tag in _storage_root(soup).find_all(True) if tag.name.endswith("attachment")), None)
517
+ if not attachment:
518
+ return None
519
+ return _attr(attachment, "ri:filename", "filename")
520
+
521
+
522
+ def _storage_soup(fragment: str) -> BeautifulSoup:
523
+ return BeautifulSoup(f"<pull-root>{fragment}</pull-root>", "xml")
524
+
525
+
526
+ def _storage_root(soup: BeautifulSoup) -> Tag | BeautifulSoup:
527
+ return soup.find("pull-root") or soup.body or soup
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from markdownify import markdownify as html_to_markdown
4
+
5
+
6
+ def rendered_html_to_markdown(html: str) -> str:
7
+ markdown = html_to_markdown(
8
+ html or "",
9
+ heading_style="ATX",
10
+ bullets="-",
11
+ strip=["script", "style"],
12
+ )
13
+ lines = [line.rstrip() for line in markdown.splitlines()]
14
+ compact: list[str] = []
15
+ blank_count = 0
16
+ for line in lines:
17
+ if line:
18
+ blank_count = 0
19
+ compact.append(line)
20
+ else:
21
+ blank_count += 1
22
+ if blank_count <= 2:
23
+ compact.append("")
24
+ return "\n".join(compact).strip() + "\n" if compact else ""