md2confluence 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2confluence.py ADDED
@@ -0,0 +1,1768 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ md2confluence - Markdown to Confluence Sync Tool
4
+
5
+ Converts markdown documentation to Confluence storage format and syncs via API.
6
+
7
+ Features:
8
+ - Markdown to Confluence storage format conversion
9
+ - Image upload and embedding
10
+ - Page creation and updates
11
+ - Retry logic with exponential backoff
12
+ - Rate limiting
13
+ - Dry-run mode
14
+ - Config file support
15
+
16
+ Usage:
17
+ ./md2confluence.py # Sync all configured docs
18
+ ./md2confluence.py --dry-run # Preview without changes
19
+ ./md2confluence.py --list # List configured documents
20
+ ./md2confluence.py --verify # Verify config and connectivity
21
+ ./md2confluence.py --single "Title" path.md parent_id
22
+
23
+ Environment Variables:
24
+ CONFLUENCE_API_TOKEN API token (required)
25
+ CONFLUENCE_USER_EMAIL User email (required for basic auth)
26
+ CONFLUENCE_AUTH_MODE Auth mode: basic, bearer, or auto (default: auto)
27
+ """
28
+
29
+ import argparse
30
+ import html
31
+ import json
32
+ import os
33
+ import re
34
+ import sys
35
+ import time
36
+ from dataclasses import dataclass, field
37
+ from pathlib import Path
38
+ from typing import Dict, List, Optional, Tuple
39
+ from urllib.parse import quote
40
+
41
+ import requests
42
+
43
+ # =============================================================================
44
+ # CONFIGURATION
45
+ # =============================================================================
46
+
47
+ REPO_URL = os.environ.get("CONFLUENCE_REPO_URL", "")
48
+
49
+
50
+ # =============================================================================
51
+ # EMOJI MAPPING
52
+ # =============================================================================
53
+
54
+ # Common emoji shortcodes -> Unicode
55
+ EMOJI_MAP = {
56
+ # Smileys
57
+ "smile": "😄", "grinning": "😀", "laughing": "😆", "blush": "😊",
58
+ "smiley": "😃", "relaxed": "☺️", "smirk": "😏", "heart_eyes": "😍",
59
+ "kissing_heart": "😘", "kissing": "😗", "wink": "😉", "stuck_out_tongue": "😛",
60
+ "stuck_out_tongue_winking_eye": "😜", "stuck_out_tongue_closed_eyes": "😝",
61
+ "disappointed": "😞", "worried": "😟", "angry": "😠", "rage": "😡",
62
+ "cry": "😢", "sob": "😭", "fearful": "😨", "weary": "😩",
63
+ "sleepy": "😪", "tired_face": "😫", "grimacing": "😬", "scream": "😱",
64
+ "flushed": "😳", "dizzy_face": "😵", "mask": "😷", "sunglasses": "😎",
65
+ "confused": "😕", "neutral_face": "😐", "expressionless": "😑",
66
+ "unamused": "😒", "sweat": "😓", "pensive": "😔", "confounded": "😖",
67
+ "kissing_closed_eyes": "😚", "kissing_smiling_eyes": "😙",
68
+ "relieved": "😌", "satisfied": "😆", "grin": "😁", "joy": "😂",
69
+ "innocent": "😇", "imp": "😈", "smiling_imp": "😈", "wink2": "😉",
70
+ "yum": "😋", "triumph": "😤", "sleeping": "😴", "thinking": "🤔",
71
+ "drooling_face": "🤤", "lying_face": "🤥", "hugging": "🤗",
72
+ "zipper_mouth": "🤐", "money_mouth": "🤑", "nerd": "🤓",
73
+ "face_with_thermometer": "🤒", "face_with_head_bandage": "🤕",
74
+ "nauseated_face": "🤢", "sneezing_face": "🤧", "cowboy": "🤠",
75
+ "clown": "🤡", "rofl": "🤣", "upside_down": "🙃", "slightly_smiling_face": "🙂",
76
+ # Gestures
77
+ "thumbsup": "👍", "+1": "👍", "thumbsdown": "👎", "-1": "👎",
78
+ "ok_hand": "👌", "punch": "👊", "fist": "✊", "v": "✌️",
79
+ "wave": "👋", "hand": "✋", "raised_hand": "✋", "open_hands": "👐",
80
+ "point_up": "☝️", "point_down": "👇", "point_left": "👈", "point_right": "👉",
81
+ "raised_hands": "🙌", "pray": "🙏", "clap": "👏", "muscle": "💪",
82
+ "metal": "🤘", "middle_finger": "🖕", "writing_hand": "✍️",
83
+ # Hearts & Symbols
84
+ "heart": "❤️", "yellow_heart": "💛", "green_heart": "💚",
85
+ "blue_heart": "💙", "purple_heart": "💜", "black_heart": "🖤",
86
+ "broken_heart": "💔", "heartbeat": "💓", "heartpulse": "💗",
87
+ "two_hearts": "💕", "sparkling_heart": "💖", "cupid": "💘",
88
+ "gift_heart": "💝", "revolving_hearts": "💞", "heart_decoration": "💟",
89
+ "star": "⭐", "star2": "🌟", "sparkles": "✨", "zap": "⚡",
90
+ "fire": "🔥", "boom": "💥", "collision": "💥", "sweat_drops": "💦",
91
+ "droplet": "💧", "dash": "💨", "cloud": "☁️", "sun": "☀️",
92
+ "tada": "🎉", "confetti_ball": "🎊", "balloon": "🎈", "gift": "🎁",
93
+ "ribbon": "🎀", "party_popper": "🎉",
94
+ # Objects
95
+ "bulb": "💡", "flashlight": "🔦", "wrench": "🔧", "hammer": "🔨",
96
+ "nut_and_bolt": "🔩", "gear": "⚙️", "link": "🔗", "chains": "⛓️",
97
+ "lock": "🔒", "unlock": "🔓", "key": "🔑", "bell": "🔔",
98
+ "bookmark": "🔖", "clipboard": "📋", "pushpin": "📌", "paperclip": "📎",
99
+ "scissors": "✂️", "pencil": "✏️", "pencil2": "✏️", "pen": "🖊️",
100
+ "memo": "📝", "file_folder": "📁", "open_file_folder": "📂",
101
+ "calendar": "📅", "date": "📅", "chart": "📊", "chart_with_upwards_trend": "📈",
102
+ "chart_with_downwards_trend": "📉", "bar_chart": "📊",
103
+ # Status indicators
104
+ "white_check_mark": "✅", "check": "✔️", "heavy_check_mark": "✔️",
105
+ "x": "❌", "cross_mark": "❌", "negative_squared_cross_mark": "❎",
106
+ "warning": "⚠️", "no_entry": "⛔", "no_entry_sign": "🚫",
107
+ "construction": "🚧", "rotating_light": "🚨",
108
+ "question": "❓", "grey_question": "❔", "exclamation": "❗",
109
+ "grey_exclamation": "❕", "bangbang": "‼️", "interrobang": "⁉️",
110
+ # Arrows
111
+ "arrow_up": "⬆️", "arrow_down": "⬇️", "arrow_left": "⬅️", "arrow_right": "➡️",
112
+ "arrow_upper_left": "↖️", "arrow_upper_right": "↗️",
113
+ "arrow_lower_left": "↙️", "arrow_lower_right": "↘️",
114
+ "left_right_arrow": "↔️", "arrow_up_down": "↕️",
115
+ "arrows_counterclockwise": "🔄", "arrows_clockwise": "🔃",
116
+ # Misc
117
+ "rocket": "🚀", "airplane": "✈️", "car": "🚗", "taxi": "🚕",
118
+ "bus": "🚌", "ambulance": "🚑", "fire_engine": "🚒", "police_car": "🚓",
119
+ "bike": "🚲", "ship": "🚢", "anchor": "⚓", "hourglass": "⌛",
120
+ "watch": "⌚", "alarm_clock": "⏰", "stopwatch": "⏱️", "timer": "⏲️",
121
+ "trophy": "🏆", "medal": "🏅", "1st_place_medal": "🥇",
122
+ "2nd_place_medal": "🥈", "3rd_place_medal": "🥉",
123
+ "soccer": "⚽", "basketball": "🏀", "football": "🏈", "baseball": "⚾",
124
+ "tennis": "🎾", "golf": "⛳", "bug": "🐛", "ant": "🐜", "bee": "🐝",
125
+ "beetle": "🐞", "snail": "🐌", "octopus": "🐙", "shell": "🐚",
126
+ "cat": "🐱", "dog": "🐶", "wolf": "🐺", "fox": "🦊", "bear": "🐻",
127
+ "panda": "🐼", "koala": "🐨", "tiger": "🐯", "lion": "🦁",
128
+ "cow": "🐮", "pig": "🐷", "frog": "🐸", "monkey": "🐵",
129
+ "chicken": "🐔", "penguin": "🐧", "bird": "🐦", "eagle": "🦅",
130
+ "duck": "🦆", "owl": "🦉", "bat": "🦇", "shark": "🦈",
131
+ "whale": "🐳", "dolphin": "🐬", "fish": "🐟", "tropical_fish": "🐠",
132
+ "turtle": "🐢", "snake": "🐍", "dragon": "🐉", "unicorn": "🦄",
133
+ "crab": "🦀", "shrimp": "🦐", "squid": "🦑", "butterfly": "🦋",
134
+ "rose": "🌹", "tulip": "🌷", "sunflower": "🌻", "hibiscus": "🌺",
135
+ "cherry_blossom": "🌸", "bouquet": "💐", "seedling": "🌱",
136
+ "evergreen_tree": "🌲", "deciduous_tree": "🌳", "palm_tree": "🌴",
137
+ "cactus": "🌵", "herb": "🌿", "shamrock": "☘️", "four_leaf_clover": "🍀",
138
+ "maple_leaf": "🍁", "fallen_leaf": "🍂", "leaves": "🍃",
139
+ "apple": "🍎", "green_apple": "🍏", "pear": "🍐", "tangerine": "🍊",
140
+ "lemon": "🍋", "banana": "🍌", "watermelon": "🍉", "grapes": "🍇",
141
+ "strawberry": "🍓", "melon": "🍈", "cherries": "🍒", "peach": "🍑",
142
+ "pineapple": "🍍", "kiwi": "🥝", "avocado": "🥑", "tomato": "🍅",
143
+ "eggplant": "🍆", "cucumber": "🥒", "carrot": "🥕", "corn": "🌽",
144
+ "hot_pepper": "🌶️", "potato": "🥔", "sweet_potato": "🍠",
145
+ "chestnut": "🌰", "peanuts": "🥜", "bread": "🍞", "croissant": "🥐",
146
+ "pizza": "🍕", "hamburger": "🍔", "fries": "🍟", "hotdog": "🌭",
147
+ "taco": "🌮", "burrito": "🌯", "egg": "🥚", "cooking": "🍳",
148
+ "coffee": "☕", "tea": "🍵", "sake": "🍶", "beer": "🍺", "beers": "🍻",
149
+ "wine_glass": "🍷", "cocktail": "🍸", "tropical_drink": "🍹",
150
+ "champagne": "🍾", "ice_cream": "🍨", "shaved_ice": "🍧",
151
+ "cake": "🍰", "birthday": "🎂", "cookie": "🍪", "chocolate_bar": "🍫",
152
+ "candy": "🍬", "lollipop": "🍭", "doughnut": "🍩", "popcorn": "🍿",
153
+ }
154
+
155
+
156
+ # =============================================================================
157
+ # FRONT-MATTER PARSING
158
+ # =============================================================================
159
+
160
+ @dataclass
161
+ class DocumentMeta:
162
+ """Metadata extracted from document front-matter."""
163
+ title: Optional[str] = None
164
+ tags: List[str] = field(default_factory=list)
165
+ space_key: Optional[str] = None
166
+ parent_id: Optional[str] = None
167
+ add_disclaimer: Optional[bool] = None
168
+ max_image_width: Optional[int] = None
169
+ # Edge case handling overrides
170
+ image_failure_behavior: Optional[str] = None
171
+ title_special_chars: Optional[str] = None
172
+ # Sync control
173
+ synchronized: bool = True
174
+
175
+
176
+ def parse_front_matter(content: str) -> Tuple[DocumentMeta, str]:
177
+ """Parse YAML/JSON front-matter from markdown content.
178
+
179
+ Front-matter must be at the start of the file, delimited by '---'.
180
+ Returns (metadata, content_without_frontmatter).
181
+ """
182
+ meta = DocumentMeta()
183
+
184
+ # Check for front-matter delimiter
185
+ if not content.startswith('---'):
186
+ return meta, content
187
+
188
+ # Find closing delimiter
189
+ lines = content.split('\n')
190
+ end_idx = None
191
+ for i, line in enumerate(lines[1:], start=1):
192
+ if line.strip() == '---':
193
+ end_idx = i
194
+ break
195
+
196
+ if end_idx is None:
197
+ return meta, content
198
+
199
+ # Extract front-matter block
200
+ fm_lines = lines[1:end_idx]
201
+ fm_text = '\n'.join(fm_lines)
202
+ remaining_content = '\n'.join(lines[end_idx + 1:]).lstrip('\n')
203
+
204
+ # Try JSON first, then YAML-like parsing
205
+ fm_data = {}
206
+ try:
207
+ fm_data = json.loads(fm_text)
208
+ except json.JSONDecodeError:
209
+ # Simple YAML-like parsing (key: value)
210
+ for line in fm_lines:
211
+ line = line.strip()
212
+ if not line or line.startswith('#'):
213
+ continue
214
+ if ':' in line:
215
+ key, _, value = line.partition(':')
216
+ key = key.strip()
217
+ value = value.strip()
218
+
219
+ # Handle arrays: [item1, item2] or - item
220
+ if value.startswith('[') and value.endswith(']'):
221
+ # Inline array
222
+ items = value[1:-1].split(',')
223
+ fm_data[key] = [item.strip().strip('"\'') for item in items if item.strip()]
224
+ elif value.lower() in ('true', 'false'):
225
+ fm_data[key] = value.lower() == 'true'
226
+ elif value.isdigit():
227
+ fm_data[key] = int(value)
228
+ elif value.startswith('"') and value.endswith('"'):
229
+ fm_data[key] = value[1:-1]
230
+ elif value.startswith("'") and value.endswith("'"):
231
+ fm_data[key] = value[1:-1]
232
+ elif value:
233
+ fm_data[key] = value
234
+
235
+ # Map front-matter fields to DocumentMeta
236
+ if 'title' in fm_data:
237
+ meta.title = str(fm_data['title'])
238
+ if 'tags' in fm_data:
239
+ if isinstance(fm_data['tags'], list):
240
+ meta.tags = [str(t) for t in fm_data['tags']]
241
+ else:
242
+ meta.tags = [str(fm_data['tags'])]
243
+ if 'labels' in fm_data: # Alias for tags
244
+ if isinstance(fm_data['labels'], list):
245
+ meta.tags.extend([str(t) for t in fm_data['labels']])
246
+ else:
247
+ meta.tags.append(str(fm_data['labels']))
248
+ if 'space' in fm_data or 'space_key' in fm_data:
249
+ meta.space_key = str(fm_data.get('space') or fm_data.get('space_key'))
250
+ if 'parent_id' in fm_data or 'parent' in fm_data:
251
+ meta.parent_id = str(fm_data.get('parent_id') or fm_data.get('parent'))
252
+ if 'add_disclaimer' in fm_data:
253
+ meta.add_disclaimer = bool(fm_data['add_disclaimer'])
254
+ elif 'disclaimer' in fm_data:
255
+ meta.add_disclaimer = bool(fm_data['disclaimer'])
256
+ if 'max_image_width' in fm_data:
257
+ meta.max_image_width = int(fm_data['max_image_width'])
258
+ if 'image_failure_behavior' in fm_data:
259
+ meta.image_failure_behavior = str(fm_data['image_failure_behavior'])
260
+ if 'title_special_chars' in fm_data:
261
+ meta.title_special_chars = str(fm_data['title_special_chars'])
262
+ if 'synchronized' in fm_data or 'sync' in fm_data:
263
+ val = fm_data.get('synchronized', fm_data.get('sync', True))
264
+ meta.synchronized = bool(val)
265
+
266
+ return meta, remaining_content
267
+
268
+
269
+ @dataclass
270
+ class Config:
271
+ """Configuration for Confluence sync."""
272
+ # Connection
273
+ base_url: str = ""
274
+ space_key: str = ""
275
+ auth_mode: str = "auto" # basic, bearer, or auto
276
+ api_token: str = ""
277
+ user_email: str = ""
278
+
279
+ # Parent pages
280
+ tech_parent_id: str = ""
281
+ user_parent_id: str = ""
282
+
283
+ # Behavior
284
+ max_retries: int = 3
285
+ retry_delay: int = 5
286
+ timeout: int = 30
287
+ rate_limit_ms: int = 100
288
+ add_disclaimer: bool = True
289
+ max_image_width: int = 800
290
+
291
+ # Edge case handling
292
+ existing_page_behavior: str = "update" # update, skip, fail
293
+ missing_parent_behavior: str = "fail" # fail, create
294
+ missing_file_behavior: str = "skip" # skip, fail
295
+ image_failure_behavior: str = "placeholder" # placeholder, skip, fail
296
+ title_special_chars: str = "sanitize" # sanitize, encode, fail
297
+ title_strip_pattern: str = r'[<>:"/\\|?*]'
298
+
299
+ # Document mappings: key -> (title, path, parent_id)
300
+ documents: Dict[str, Tuple[str, str, str]] = field(default_factory=dict)
301
+
302
+ # Runtime
303
+ dry_run: bool = False
304
+ verbose: bool = False
305
+
306
+ # Cached values
307
+ _space_id: str = ""
308
+
309
+
310
+ def load_config(config_file: str = ".confluence-sync.conf") -> Config:
311
+ """Load configuration from shell-style config file."""
312
+ global REPO_URL
313
+ config = Config()
314
+
315
+ # Track config sources for reporting
316
+ config_sources = {}
317
+
318
+ # Load from environment first
319
+ env_vars = {
320
+ "api_token": "CONFLUENCE_API_TOKEN",
321
+ "user_email": "CONFLUENCE_USER_EMAIL",
322
+ "auth_mode": "CONFLUENCE_AUTH_MODE",
323
+ "base_url": "CONFLUENCE_BASE_URL",
324
+ "space_key": "CONFLUENCE_SPACE_KEY",
325
+ "tech_parent_id": "CONFLUENCE_TECH_PARENT_ID",
326
+ "user_parent_id": "CONFLUENCE_USER_PARENT_ID",
327
+ }
328
+
329
+ for attr, env_name in env_vars.items():
330
+ value = os.environ.get(env_name, "")
331
+ if value:
332
+ setattr(config, attr, value)
333
+ config_sources[attr] = "env"
334
+
335
+ # Default auth_mode if not set
336
+ if not config.auth_mode:
337
+ config.auth_mode = "auto"
338
+
339
+ # Parse config file if it exists
340
+ config_path = Path(config_file)
341
+ if config_path.exists():
342
+ log_info(f"Config file found: {config_file}")
343
+ content = config_path.read_text()
344
+
345
+ # Build a local variable context for resolving references
346
+ local_vars = {
347
+ "CONFLUENCE_TECH_PARENT_ID": config.tech_parent_id,
348
+ "CONFLUENCE_USER_PARENT_ID": config.user_parent_id,
349
+ }
350
+
351
+ # Two-pass parsing: first pass gets base values, second pass resolves references
352
+ doc_lines = [] # Store doc lines for second pass
353
+
354
+ # Parse shell variable assignments
355
+ for line in content.split('\n'):
356
+ line = line.strip()
357
+ if not line or line.startswith('#'):
358
+ continue
359
+
360
+ # Handle variable assignments: VAR="value" or VAR="${VAR:-default}"
361
+ match = re.match(r'^([A-Z_][A-Z0-9_]*)=(.+)$', line)
362
+ if match:
363
+ var_name = match.group(1)
364
+ var_value = match.group(2).strip('"\'')
365
+
366
+ # Resolve ${VAR:-default} patterns using env + local vars
367
+ def resolve_default(m):
368
+ env_var = m.group(1)
369
+ default = m.group(2)
370
+ return os.environ.get(env_var, local_vars.get(env_var, default))
371
+
372
+ var_value = re.sub(r'\$\{([^:}]+):-([^}]*)\}', resolve_default, var_value)
373
+ var_value = re.sub(r'\$\{([^}]+)\}', lambda m: os.environ.get(m.group(1), local_vars.get(m.group(1), '')), var_value)
374
+
375
+ # Store document mappings for second pass
376
+ if var_name.startswith("CONFLUENCE_DOC_"):
377
+ doc_lines.append((var_name, var_value))
378
+ continue
379
+
380
+ # Update local vars for reference resolution
381
+ local_vars[var_name] = var_value
382
+
383
+ # Map to config attributes (only override if not set from env)
384
+ if var_name == "CONFLUENCE_API_TOKEN" and "api_token" not in config_sources:
385
+ config.api_token = var_value
386
+ config_sources["api_token"] = "config"
387
+ elif var_name == "CONFLUENCE_USER_EMAIL" and "user_email" not in config_sources:
388
+ config.user_email = var_value
389
+ config_sources["user_email"] = "config"
390
+ elif var_name == "CONFLUENCE_BASE_URL" and "base_url" not in config_sources:
391
+ config.base_url = var_value
392
+ config_sources["base_url"] = "config"
393
+ elif var_name == "CONFLUENCE_SPACE_KEY" and "space_key" not in config_sources:
394
+ config.space_key = var_value
395
+ config_sources["space_key"] = "config"
396
+ elif var_name == "CONFLUENCE_AUTH_MODE" and "auth_mode" not in config_sources:
397
+ config.auth_mode = var_value
398
+ config_sources["auth_mode"] = "config"
399
+ elif var_name == "CONFLUENCE_REPO_URL":
400
+ REPO_URL = var_value
401
+ elif var_name == "CONFLUENCE_TECH_PARENT_ID" and "tech_parent_id" not in config_sources:
402
+ config.tech_parent_id = var_value
403
+ config_sources["tech_parent_id"] = "config"
404
+ local_vars[var_name] = var_value
405
+ elif var_name == "CONFLUENCE_USER_PARENT_ID" and "user_parent_id" not in config_sources:
406
+ config.user_parent_id = var_value
407
+ config_sources["user_parent_id"] = "config"
408
+ local_vars[var_name] = var_value
409
+ elif var_name == "CONFLUENCE_MAX_RETRIES":
410
+ config.max_retries = int(var_value)
411
+ elif var_name == "CONFLUENCE_RETRY_DELAY":
412
+ config.retry_delay = int(var_value)
413
+ elif var_name == "CONFLUENCE_TIMEOUT":
414
+ config.timeout = int(var_value)
415
+ elif var_name == "CONFLUENCE_RATE_LIMIT_MS":
416
+ config.rate_limit_ms = int(var_value)
417
+ elif var_name == "CONFLUENCE_ADD_DISCLAIMER":
418
+ config.add_disclaimer = var_value.lower() == "true"
419
+ elif var_name == "CONFLUENCE_MAX_IMAGE_WIDTH":
420
+ config.max_image_width = int(var_value)
421
+ elif var_name == "CONFLUENCE_EXISTING_PAGE_BEHAVIOR":
422
+ config.existing_page_behavior = var_value
423
+ elif var_name == "CONFLUENCE_MISSING_PARENT_BEHAVIOR":
424
+ config.missing_parent_behavior = var_value
425
+ elif var_name == "CONFLUENCE_MISSING_FILE_BEHAVIOR":
426
+ config.missing_file_behavior = var_value
427
+ elif var_name == "CONFLUENCE_IMAGE_FAILURE_BEHAVIOR":
428
+ config.image_failure_behavior = var_value
429
+ elif var_name == "CONFLUENCE_TITLE_SPECIAL_CHARS":
430
+ config.title_special_chars = var_value
431
+ elif var_name == "CONFLUENCE_TITLE_STRIP_PATTERN":
432
+ config.title_strip_pattern = var_value
433
+
434
+ # Second pass: process document mappings with resolved parent IDs
435
+ for var_name, var_value in doc_lines:
436
+ parts = var_value.split('|')
437
+ if len(parts) == 3:
438
+ doc_key = var_name[15:] # Remove CONFLUENCE_DOC_ prefix
439
+ title, path, parent_id = parts
440
+ # Resolve parent_id references using local_vars
441
+ if parent_id.startswith("${") and parent_id.endswith("}"):
442
+ ref_var = parent_id[2:-1]
443
+ parent_id = local_vars.get(ref_var, config.tech_parent_id)
444
+ config.documents[doc_key] = (title, path, parent_id)
445
+
446
+ log_info(f" Loaded {len(config.documents)} document mapping(s) from config")
447
+ else:
448
+ log_warn(f"Config file not found: {config_file}")
449
+ log_info(" Using environment variables only")
450
+
451
+ # Log config sources
452
+ if config_sources:
453
+ env_count = sum(1 for v in config_sources.values() if v == "env")
454
+ conf_count = sum(1 for v in config_sources.values() if v == "config")
455
+ if env_count:
456
+ log_debug(f" {env_count} setting(s) from environment variables")
457
+ if conf_count:
458
+ log_debug(f" {conf_count} setting(s) from config file")
459
+
460
+ # Auto-detect auth mode
461
+ if config.auth_mode == "auto":
462
+ config.auth_mode = "basic" if config.user_email else "bearer"
463
+
464
+ return config
465
+
466
+
467
+ # =============================================================================
468
+ # LOGGING
469
+ # =============================================================================
470
+
471
+ # ANSI colors (disabled if not a terminal)
472
+ if sys.stdout.isatty():
473
+ RED = '\033[0;31m'
474
+ GREEN = '\033[0;32m'
475
+ YELLOW = '\033[1;33m'
476
+ BLUE = '\033[0;34m'
477
+ NC = '\033[0m'
478
+ else:
479
+ RED = GREEN = YELLOW = BLUE = NC = ''
480
+
481
+ _verbose = False
482
+
483
+ def log_info(msg: str):
484
+ print(f"{GREEN}[INFO]{NC} {msg}")
485
+
486
+ def log_warn(msg: str):
487
+ print(f"{YELLOW}[WARN]{NC} {msg}")
488
+
489
+ def log_error(msg: str):
490
+ print(f"{RED}[ERROR]{NC} {msg}", file=sys.stderr)
491
+
492
+ def log_debug(msg: str):
493
+ if _verbose:
494
+ print(f"{BLUE}[DEBUG]{NC} {msg}", file=sys.stderr)
495
+
496
+
497
+ # =============================================================================
498
+ # MARKDOWN TO CONFLUENCE CONVERSION
499
+ # =============================================================================
500
+
501
+ def escape_html(text: str) -> str:
502
+ """Escape HTML special characters."""
503
+ return html.escape(text, quote=False)
504
+
505
+
506
+ def convert_code_blocks(content: str) -> str:
507
+ """Convert fenced code blocks to Confluence code macro."""
508
+ def replace_code_block(match):
509
+ lang = match.group(1) or "text"
510
+ code = match.group(2)
511
+ lang_map = {
512
+ "sh": "bash", "shell": "bash", "js": "javascript",
513
+ "ts": "typescript", "py": "python", "yml": "yaml", "": "text"
514
+ }
515
+ lang = lang_map.get(lang.lower(), lang.lower())
516
+ code = escape_html(code.strip())
517
+
518
+ return f'''<ac:structured-macro ac:name="code" ac:schema-version="1">
519
+ <ac:parameter ac:name="language">{lang}</ac:parameter>
520
+ <ac:parameter ac:name="theme">Confluence</ac:parameter>
521
+ <ac:parameter ac:name="linenumbers">true</ac:parameter>
522
+ <ac:plain-text-body><![CDATA[{code}]]></ac:plain-text-body>
523
+ </ac:structured-macro>'''
524
+
525
+ return re.sub(r'```(\w*)\n(.*?)```', replace_code_block, content, flags=re.DOTALL)
526
+
527
+
528
+ def convert_inline_code(content: str) -> str:
529
+ """Convert inline code to <code> tags."""
530
+ return re.sub(r'`([^`]+)`', r'<code>\1</code>', content)
531
+
532
+
533
+ def convert_info_panels(content: str) -> str:
534
+ """Convert blockquotes to Confluence format."""
535
+ PANEL_PATTERNS = [
536
+ (r'^\*{0,2}note:?\*{0,2}\s*', 'info'),
537
+ (r'^\*{0,2}info:?\*{0,2}\s*', 'info'),
538
+ (r'^\*{0,2}warning:?\*{0,2}\s*', 'warning'),
539
+ (r'^\*{0,2}caution:?\*{0,2}\s*', 'warning'),
540
+ (r'^\*{0,2}danger:?\*{0,2}\s*', 'warning'),
541
+ (r'^\*{0,2}tip:?\*{0,2}\s*', 'tip'),
542
+ (r'^\*{0,2}hint:?\*{0,2}\s*', 'tip'),
543
+ (r'^\*{0,2}important:?\*{0,2}\s*', 'note'),
544
+ (r'^\*{0,2}security\s+tip:?\*{0,2}\s*', 'warning'),
545
+ (r'^\*{0,2}pro\s+tip:?\*{0,2}\s*', 'tip'),
546
+ ]
547
+
548
+ def detect_panel(text: str) -> Tuple[Optional[str], str]:
549
+ text_stripped = text.strip()
550
+ text_lower = text_stripped.lower()
551
+ for pattern, panel_type in PANEL_PATTERNS:
552
+ match = re.match(pattern, text_lower, re.IGNORECASE)
553
+ if match:
554
+ remaining = text_stripped[match.end():].strip()
555
+ return (panel_type, remaining)
556
+ return (None, text)
557
+
558
+ def make_panel(panel_type: str, text: str) -> str:
559
+ return f'''<ac:structured-macro ac:name="{panel_type}" ac:schema-version="1">
560
+ <ac:parameter ac:name="icon">true</ac:parameter>
561
+ <ac:rich-text-body><p>{text}</p></ac:rich-text-body>
562
+ </ac:structured-macro>'''
563
+
564
+ def make_blockquote(text: str) -> str:
565
+ return f'<blockquote><p>{text}</p></blockquote>'
566
+
567
+ lines = content.split('\n')
568
+ result = []
569
+ blockquote_lines = []
570
+
571
+ for line in lines:
572
+ if line.startswith('> '):
573
+ blockquote_lines.append(line[2:])
574
+ elif line.startswith('>') and len(line) > 0:
575
+ blockquote_lines.append(line[1:].lstrip())
576
+ else:
577
+ if blockquote_lines:
578
+ quote_text = ' '.join(blockquote_lines).strip()
579
+ panel_type, remaining_text = detect_panel(quote_text)
580
+ if panel_type:
581
+ result.append(make_panel(panel_type, remaining_text))
582
+ else:
583
+ result.append(make_blockquote(quote_text))
584
+ blockquote_lines = []
585
+ result.append(line)
586
+
587
+ if blockquote_lines:
588
+ quote_text = ' '.join(blockquote_lines).strip()
589
+ panel_type, remaining_text = detect_panel(quote_text)
590
+ if panel_type:
591
+ result.append(make_panel(panel_type, remaining_text))
592
+ else:
593
+ result.append(make_blockquote(quote_text))
594
+
595
+ return '\n'.join(result)
596
+
597
+
598
+ def convert_tables(content: str) -> str:
599
+ """Convert markdown tables to Confluence tables."""
600
+ lines = content.split('\n')
601
+ result = []
602
+ table_lines = []
603
+ in_table = False
604
+
605
+ for line in lines:
606
+ if '|' in line and line.strip().startswith('|'):
607
+ in_table = True
608
+ table_lines.append(line)
609
+ else:
610
+ if in_table and table_lines:
611
+ result.append(process_table(table_lines))
612
+ table_lines = []
613
+ in_table = False
614
+ result.append(line)
615
+
616
+ if table_lines:
617
+ result.append(process_table(table_lines))
618
+
619
+ return '\n'.join(result)
620
+
621
+
622
+ def process_table(table_lines: List[str]) -> str:
623
+ """Process a markdown table into Confluence format."""
624
+ if len(table_lines) < 2:
625
+ return '\n'.join(table_lines)
626
+
627
+ rows = []
628
+ is_header = True
629
+
630
+ for line in table_lines:
631
+ if re.match(r'^\|[\s\-:|]+\|$', line.strip()):
632
+ continue
633
+
634
+ cells = [c.strip() for c in line.strip().split('|')[1:-1]]
635
+
636
+ if is_header:
637
+ row = '<tr>' + ''.join(f'<th><p>{cell}</p></th>' for cell in cells) + '</tr>'
638
+ is_header = False
639
+ else:
640
+ row = '<tr>' + ''.join(f'<td><p>{cell}</p></td>' for cell in cells) + '</tr>'
641
+
642
+ rows.append(row)
643
+
644
+ return f'<table data-layout="default"><colgroup></colgroup><tbody>{"".join(rows)}</tbody></table>'
645
+
646
+
647
+ def convert_lists(content: str) -> str:
648
+ """Convert markdown lists to HTML lists."""
649
+ lines = content.split('\n')
650
+ result = []
651
+ list_stack = []
652
+
653
+ i = 0
654
+ while i < len(lines):
655
+ line = lines[i]
656
+
657
+ ul_match = re.match(r'^(\s*)[-*+]\s+(.+)$', line)
658
+ ol_match = re.match(r'^(\s*)(\d+)\.\s+(.+)$', line)
659
+
660
+ if ul_match:
661
+ indent = len(ul_match.group(1))
662
+ text = ul_match.group(2)
663
+ list_type = 'ul'
664
+ elif ol_match:
665
+ indent = len(ol_match.group(1))
666
+ text = ol_match.group(3)
667
+ list_type = 'ol'
668
+ else:
669
+ while list_stack:
670
+ _, lt = list_stack.pop()
671
+ result.append(f'</{lt}>')
672
+ result.append(line)
673
+ i += 1
674
+ continue
675
+
676
+ level = indent // 2
677
+
678
+ while list_stack and list_stack[-1][0] > level:
679
+ _, lt = list_stack.pop()
680
+ result.append(f'</{lt}>')
681
+
682
+ if list_stack and list_stack[-1][0] == level and list_stack[-1][1] != list_type:
683
+ _, lt = list_stack.pop()
684
+ result.append(f'</{lt}>')
685
+
686
+ if not list_stack or list_stack[-1][0] < level:
687
+ result.append(f'<{list_type}>')
688
+ list_stack.append((level, list_type))
689
+
690
+ result.append(f'<li>{text}</li>')
691
+ i += 1
692
+
693
+ while list_stack:
694
+ _, lt = list_stack.pop()
695
+ result.append(f'</{lt}>')
696
+
697
+ return '\n'.join(result)
698
+
699
+
700
+ def convert_headers(content: str) -> str:
701
+ """Convert markdown headers to HTML headers."""
702
+ for i in range(6, 0, -1):
703
+ pattern = r'^' + '#' * i + r' (.+)$'
704
+ content = re.sub(pattern, rf'<h{i}>\1</h{i}>', content, flags=re.MULTILINE)
705
+ return content
706
+
707
+
708
+ def convert_emphasis(content: str) -> str:
709
+ """Convert bold, italic, and strikethrough."""
710
+ content = re.sub(r'~~([^~]+)~~', r'<del>\1</del>', content)
711
+ content = re.sub(r'\*\*\*([^*]+)\*\*\*', r'<strong><em>\1</em></strong>', content)
712
+ content = re.sub(r'___([^_]+)___', r'<strong><em>\1</em></strong>', content)
713
+ content = re.sub(r'\*\*([^*]+)\*\*', r'<strong>\1</strong>', content)
714
+ content = re.sub(r'__([^_]+)__', r'<strong>\1</strong>', content)
715
+ content = re.sub(r'(?<![*\w])\*([^*]+)\*(?![*\w])', r'<em>\1</em>', content)
716
+ content = re.sub(r'(?<![_\w])_([^_]+)_(?![_\w])', r'<em>\1</em>', content)
717
+ return content
718
+
719
+
720
+ def convert_task_lists(content: str) -> str:
721
+ """Convert task list checkboxes to Confluence status macros."""
722
+ content = re.sub(
723
+ r'^(\s*)[-*+]\s+\[[xX]\]\s+(.+)$',
724
+ r'\1<ac:structured-macro ac:name="status"><ac:parameter ac:name="colour">Green</ac:parameter><ac:parameter ac:name="title">DONE</ac:parameter></ac:structured-macro> \2',
725
+ content, flags=re.MULTILINE
726
+ )
727
+ content = re.sub(
728
+ r'^(\s*)[-*+]\s+\[\s*\]\s+(.+)$',
729
+ r'\1<ac:structured-macro ac:name="status"><ac:parameter ac:name="colour">Grey</ac:parameter><ac:parameter ac:name="title">TODO</ac:parameter></ac:structured-macro> \2',
730
+ content, flags=re.MULTILINE
731
+ )
732
+ return content
733
+
734
+
735
+ def convert_autolinks(content: str) -> str:
736
+ """Convert autolinks <url> and bare URLs to HTML links."""
737
+ content = re.sub(r'<(https?://[^>]+)>', r'<a href="\1">\1</a>', content)
738
+ content = re.sub(r'<([^@\s]+@[^>\s]+)>', r'<a href="mailto:\1">\1</a>', content)
739
+ return content
740
+
741
+
742
+ def convert_reference_links(content: str) -> str:
743
+ """Convert reference-style links [text][ref] with [ref]: url definitions."""
744
+ ref_pattern = r'^\[([^\]]+)\]:\s*(\S+)(?:\s+"([^"]*)")?\s*$'
745
+ references = {}
746
+
747
+ for match in re.finditer(ref_pattern, content, re.MULTILINE):
748
+ ref_id = match.group(1).lower()
749
+ url = match.group(2)
750
+ title = match.group(3) or ""
751
+ references[ref_id] = (url, title)
752
+
753
+ content = re.sub(ref_pattern, '', content, flags=re.MULTILINE)
754
+
755
+ def replace_ref_link(match):
756
+ text = match.group(1)
757
+ ref_id = (match.group(2) or text).lower()
758
+ if ref_id in references:
759
+ url, title = references[ref_id]
760
+ title_attr = f' title="{title}"' if title else ''
761
+ return f'<a href="{url}"{title_attr}>{text}</a>'
762
+ return match.group(0)
763
+
764
+ content = re.sub(r'\[([^\]]+)\]\[([^\]]*)\]', replace_ref_link, content)
765
+ content = re.sub(r'\[([^\]]+)\]\[\]', replace_ref_link, content)
766
+
767
+ return content
768
+
769
+
770
+ def strip_html_comments(content: str) -> str:
771
+ """Remove HTML comments from content."""
772
+ return re.sub(r'<!--[\s\S]*?-->', '', content)
773
+
774
+
775
+ def convert_links(content: str) -> str:
776
+ """Convert markdown links to HTML links (but not images)."""
777
+ return re.sub(r'(?<!!)\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', content)
778
+
779
+
780
+ def convert_horizontal_rules(content: str) -> str:
781
+ """Convert horizontal rules."""
782
+ return re.sub(r'^---+$', r'<hr />', content, flags=re.MULTILINE)
783
+
784
+
785
+ def convert_emoji(content: str) -> str:
786
+ """Convert emoji shortcodes :name: to Unicode emoji."""
787
+ def replace_emoji(match):
788
+ name = match.group(1).lower()
789
+ return EMOJI_MAP.get(name, match.group(0))
790
+
791
+ return re.sub(r':([a-zA-Z0-9_+-]+):', replace_emoji, content)
792
+
793
+
794
+ def convert_subscript_superscript(content: str) -> str:
795
+ """Convert ~subscript~ and ^superscript^ to HTML tags.
796
+
797
+ Avoids converting inside code blocks or when used for other purposes.
798
+ """
799
+ # Superscript: ^text^ (but not ^^)
800
+ content = re.sub(r'(?<!\^)\^([^\^]+)\^(?!\^)', r'<sup>\1</sup>', content)
801
+ # Subscript: ~text~ (but not ~~, which is strikethrough)
802
+ content = re.sub(r'(?<!~)~([^~]+)~(?!~)', r'<sub>\1</sub>', content)
803
+ return content
804
+
805
+
806
+ def remove_extra_blank_lines(content: str) -> str:
807
+ """Remove extra blank lines."""
808
+ content = re.sub(r'\n\n+(<h[1-6]>)', r'\n\1', content)
809
+ content = re.sub(r'(</h[1-6]>)\n\n+', r'\1\n', content)
810
+ content = re.sub(r'\n\n+(<[uo]l>)', r'\n\1', content)
811
+ content = re.sub(r'(</[uo]l>)\n\n+', r'\1\n', content)
812
+ content = re.sub(r'\n\n+(<ac:structured-macro ac:name="code")', r'\n\1', content)
813
+ content = re.sub(r'(</ac:structured-macro>)\n\n+', r'\1\n', content)
814
+ content = re.sub(r'\n\n+(<table)', r'\n\1', content)
815
+ content = re.sub(r'(</table>)\n\n+', r'\1\n', content)
816
+ content = re.sub(r'\n{3,}', r'\n\n', content)
817
+ return content
818
+
819
+
820
+ def wrap_paragraphs(content: str) -> str:
821
+ """Wrap plain text lines in <p> tags."""
822
+ lines = content.split('\n')
823
+ result = []
824
+
825
+ for line in lines:
826
+ stripped = line.strip()
827
+ if not stripped:
828
+ result.append('')
829
+ continue
830
+ if stripped.startswith('<') or stripped.startswith('<!--'):
831
+ result.append(line)
832
+ continue
833
+ result.append(f'<p>{stripped}</p>')
834
+
835
+ return '\n'.join(result)
836
+
837
+
838
+ def add_disclaimer(content: str, md_file: str) -> str:
839
+ """Add a disclaimer panel at the bottom of the content."""
840
+ repo_url = REPO_URL
841
+ if repo_url:
842
+ source_link = f'<a href="{repo_url}/src/main/{md_file}">{md_file}</a>'
843
+ else:
844
+ source_link = f'<code>{md_file}</code>'
845
+
846
+ disclaimer = f'''
847
+ <ac:structured-macro ac:name="note" ac:schema-version="1">
848
+ <ac:parameter ac:name="icon">true</ac:parameter>
849
+ <ac:parameter ac:name="title">Auto-Generated Content</ac:parameter>
850
+ <ac:rich-text-body>
851
+ <p>This page is automatically generated from {source_link} in the repository.
852
+ <strong>Do not edit this page directly</strong> - changes will be overwritten on the next sync.
853
+ To update this content, modify the source file and commit to the repository.</p>
854
+ </ac:rich-text-body>
855
+ </ac:structured-macro>'''
856
+ return content + disclaimer
857
+
858
+
859
+ def fix_image_sizing(content: str, max_width: int = 800) -> str:
860
+ """Add width constraint to images."""
861
+ content = re.sub(
862
+ r'<ac:image ac:alt="([^"]*)">\s*<ri:attachment ri:filename="([^"]*)" />\s*</ac:image>',
863
+ rf'<ac:image ac:width="{max_width}" ac:alt="\1"><ri:attachment ri:filename="\2" /></ac:image>',
864
+ content
865
+ )
866
+ return content
867
+
868
+
869
+ def convert_markdown_to_confluence(content: str, md_file: str = "", config: Optional[Config] = None,
870
+ doc_meta: Optional[DocumentMeta] = None) -> str:
871
+ """Convert markdown content to Confluence storage format."""
872
+ content = strip_html_comments(content)
873
+ content = convert_code_blocks(content)
874
+ content = convert_tables(content)
875
+ content = convert_info_panels(content)
876
+ content = convert_task_lists(content)
877
+ content = convert_lists(content)
878
+ content = convert_headers(content)
879
+ content = convert_emphasis(content)
880
+ content = convert_subscript_superscript(content)
881
+ content = convert_inline_code(content)
882
+ content = convert_emoji(content)
883
+ content = convert_reference_links(content)
884
+ content = convert_autolinks(content)
885
+ content = convert_links(content)
886
+ content = convert_horizontal_rules(content)
887
+
888
+ # Determine max_image_width (doc_meta overrides config)
889
+ max_width = 800
890
+ if config:
891
+ max_width = config.max_image_width
892
+ if doc_meta and doc_meta.max_image_width is not None:
893
+ max_width = doc_meta.max_image_width
894
+
895
+ content = fix_image_sizing(content, max_width)
896
+ content = remove_extra_blank_lines(content)
897
+ content = wrap_paragraphs(content)
898
+
899
+ # Determine whether to add disclaimer (doc_meta overrides config)
900
+ should_add_disclaimer = True
901
+ if config:
902
+ should_add_disclaimer = config.add_disclaimer
903
+ if doc_meta and doc_meta.add_disclaimer is not None:
904
+ should_add_disclaimer = doc_meta.add_disclaimer
905
+
906
+ if md_file and should_add_disclaimer:
907
+ content = add_disclaimer(content, md_file)
908
+
909
+ return content
910
+
911
+
912
+ # =============================================================================
913
+ # CONFLUENCE API CLIENT
914
+ # =============================================================================
915
+
916
+ class ConfluenceClient:
917
+ """Confluence API client with retry logic and rate limiting."""
918
+
919
+ def __init__(self, config: Config):
920
+ self.config = config
921
+ self.session = requests.Session()
922
+ self._setup_auth()
923
+
924
+ def _setup_auth(self):
925
+ """Configure authentication headers."""
926
+ if self.config.auth_mode == "bearer":
927
+ self.session.headers["Authorization"] = f"Bearer {self.config.api_token}"
928
+ else:
929
+ self.session.auth = (self.config.user_email, self.config.api_token)
930
+
931
+ self.session.headers["Accept"] = "application/json"
932
+ self.session.headers["Content-Type"] = "application/json"
933
+
934
+ def _rate_limit(self):
935
+ """Apply rate limiting between requests."""
936
+ if self.config.rate_limit_ms > 0:
937
+ time.sleep(self.config.rate_limit_ms / 1000)
938
+
939
+ def _request(self, method: str, url: str, **kwargs) -> requests.Response:
940
+ """Make an HTTP request with retry logic."""
941
+ self._rate_limit()
942
+
943
+ retry_delay = self.config.retry_delay
944
+
945
+ for attempt in range(1, self.config.max_retries + 1):
946
+ try:
947
+ response = self.session.request(
948
+ method, url,
949
+ timeout=self.config.timeout,
950
+ **kwargs
951
+ )
952
+
953
+ # Retry on 5xx errors or rate limiting
954
+ if response.status_code >= 500 or response.status_code == 429:
955
+ if attempt < self.config.max_retries:
956
+ log_debug(f"Request failed (HTTP {response.status_code}), retry {attempt}/{self.config.max_retries} in {retry_delay}s...")
957
+ time.sleep(retry_delay)
958
+ retry_delay *= 2 # Exponential backoff
959
+ continue
960
+
961
+ return response
962
+
963
+ except requests.exceptions.RequestException as e:
964
+ if attempt < self.config.max_retries:
965
+ log_debug(f"Request failed ({e}), retry {attempt}/{self.config.max_retries} in {retry_delay}s...")
966
+ time.sleep(retry_delay)
967
+ retry_delay *= 2
968
+ continue
969
+ raise
970
+
971
+ return response
972
+
973
+ def get_space_id(self) -> str:
974
+ """Look up space ID from space key."""
975
+ if self.config._space_id:
976
+ return self.config._space_id
977
+
978
+ log_info(f"Looking up space ID for key: {self.config.space_key}")
979
+
980
+ url = f"{self.config.base_url}/api/v2/spaces?keys={self.config.space_key}"
981
+ log_debug(f"Space lookup URL: {url}")
982
+ response = self._request("GET", url)
983
+
984
+ if response.status_code != 200:
985
+ log_error(f"Failed to look up space (HTTP {response.status_code})")
986
+ log_error(f"Response: {response.text}")
987
+ # Provide diagnostic hints for common issues
988
+ if response.status_code == 404:
989
+ log_error("Hint: 404 errors often indicate authentication issues.")
990
+ log_error(" Verify your API token is correct and not truncated.")
991
+ log_error(" Check that CONFLUENCE_USER_EMAIL matches the token owner.")
992
+ elif response.status_code == 401:
993
+ log_error("Hint: Authentication failed. Check your API token.")
994
+ elif response.status_code == 403:
995
+ log_error("Hint: Access denied. Check your permissions for this space.")
996
+ sys.exit(2)
997
+
998
+ data = response.json()
999
+ results = data.get("results", [])
1000
+
1001
+ if not results:
1002
+ log_error(f"Could not find space with key: {self.config.space_key}")
1003
+ sys.exit(2)
1004
+
1005
+ self.config._space_id = results[0]["id"]
1006
+ log_info(f"Space ID: {self.config._space_id}")
1007
+ return self.config._space_id
1008
+
1009
+ def verify_parent_page(self, parent_id: str, parent_name: str) -> bool:
1010
+ """Verify a parent page exists."""
1011
+ if not parent_id:
1012
+ log_error(f"Parent page ID is empty for: {parent_name}")
1013
+ return False
1014
+
1015
+ log_debug(f"Verifying parent page exists: {parent_id} ({parent_name})")
1016
+
1017
+ url = f"{self.config.base_url}/api/v2/pages/{parent_id}"
1018
+ response = self._request("GET", url)
1019
+
1020
+ if response.status_code == 200:
1021
+ data = response.json()
1022
+ actual_title = data.get("title", "")
1023
+ log_debug(f"Parent page verified: {actual_title} (ID: {parent_id})")
1024
+ return True
1025
+ elif response.status_code == 404:
1026
+ log_error(f"Parent page not found: {parent_id} ({parent_name})")
1027
+ return False
1028
+ else:
1029
+ log_error(f"Failed to verify parent page (HTTP {response.status_code}): {parent_id}")
1030
+ return False
1031
+
1032
+ def get_page_id(self, title: str) -> Optional[str]:
1033
+ """Get page ID by title."""
1034
+ space_id = self.get_space_id()
1035
+ encoded_title = quote(title)
1036
+
1037
+ url = f"{self.config.base_url}/api/v2/pages?title={encoded_title}&space-id={space_id}&status=current"
1038
+ log_debug(f"Looking up page: {url}")
1039
+
1040
+ response = self._request("GET", url)
1041
+
1042
+ if response.status_code != 200:
1043
+ log_warn(f"Failed to look up page (HTTP {response.status_code})")
1044
+ return None
1045
+
1046
+ data = response.json()
1047
+ results = data.get("results", [])
1048
+
1049
+ if results:
1050
+ return results[0]["id"]
1051
+ return None
1052
+
1053
+ def get_page_version(self, page_id: str) -> int:
1054
+ """Get current version number of a page."""
1055
+ url = f"{self.config.base_url}/api/v2/pages/{page_id}"
1056
+ response = self._request("GET", url)
1057
+
1058
+ if response.status_code == 200:
1059
+ data = response.json()
1060
+ return data.get("version", {}).get("number", 1)
1061
+ return 1
1062
+
1063
+ def create_page(self, title: str, content: str, parent_id: str) -> Optional[str]:
1064
+ """Create a new Confluence page."""
1065
+ space_id = self.get_space_id()
1066
+
1067
+ payload = {
1068
+ "spaceId": space_id,
1069
+ "status": "current",
1070
+ "title": title,
1071
+ "parentId": parent_id,
1072
+ "body": {
1073
+ "representation": "storage",
1074
+ "value": content
1075
+ }
1076
+ }
1077
+
1078
+ log_debug(f"Create payload: spaceId={space_id}, title={title}, parentId={parent_id}")
1079
+
1080
+ url = f"{self.config.base_url}/api/v2/pages"
1081
+ response = self._request("POST", url, json=payload)
1082
+
1083
+ if response.status_code in (200, 201):
1084
+ data = response.json()
1085
+ return data.get("id")
1086
+ else:
1087
+ log_error(f"Failed to create page (HTTP {response.status_code})")
1088
+ try:
1089
+ log_error(f"Response: {response.json().get('message', response.text)}")
1090
+ except Exception:
1091
+ log_error(f"Response: {response.text}")
1092
+ return None
1093
+
1094
+ def update_page(self, page_id: str, title: str, content: str, version: int) -> bool:
1095
+ """Update an existing Confluence page."""
1096
+ payload = {
1097
+ "id": page_id,
1098
+ "status": "current",
1099
+ "title": title,
1100
+ "body": {
1101
+ "representation": "storage",
1102
+ "value": content
1103
+ },
1104
+ "version": {
1105
+ "number": version,
1106
+ "message": "Automated sync from repository"
1107
+ }
1108
+ }
1109
+
1110
+ url = f"{self.config.base_url}/api/v2/pages/{page_id}"
1111
+ response = self._request("PUT", url, json=payload)
1112
+
1113
+ if response.status_code == 200:
1114
+ return True
1115
+ else:
1116
+ log_error(f"Failed to update page (HTTP {response.status_code})")
1117
+ try:
1118
+ log_error(f"Response: {response.json().get('message', response.text)}")
1119
+ except Exception:
1120
+ log_error(f"Response: {response.text}")
1121
+ return False
1122
+
1123
+ def upload_attachment(self, page_id: str, file_path: Path, allowed_dir: Optional[Path] = None) -> Optional[str]:
1124
+ """Upload an attachment to a Confluence page.
1125
+
1126
+ Args:
1127
+ page_id: The Confluence page ID to attach to
1128
+ file_path: Path to the file to upload
1129
+ allowed_dir: If provided, file_path must be within this directory (security)
1130
+ """
1131
+ if not file_path.exists():
1132
+ log_warn(f" Attachment file not found: {file_path}")
1133
+ return None
1134
+
1135
+ # Security: validate file is within allowed directory if specified
1136
+ if allowed_dir is not None:
1137
+ try:
1138
+ file_path.resolve().relative_to(allowed_dir.resolve())
1139
+ except ValueError:
1140
+ log_warn(f" Blocked: file outside allowed directory: {file_path}")
1141
+ return None
1142
+
1143
+ filename = file_path.name
1144
+ log_debug(f" Uploading attachment: {filename}")
1145
+
1146
+ # Check if attachment already exists
1147
+ check_url = f"{self.config.base_url}/rest/api/content/{page_id}/child/attachment?filename={filename}"
1148
+ check_response = self._request("GET", check_url)
1149
+
1150
+ existing_id = None
1151
+ if check_response.status_code == 200:
1152
+ data = check_response.json()
1153
+ results = data.get("results", [])
1154
+ if results:
1155
+ existing_id = results[0]["id"]
1156
+
1157
+ # Prepare multipart upload
1158
+ headers = {"X-Atlassian-Token": "nocheck"}
1159
+ # Remove Content-Type for multipart
1160
+ upload_headers = {k: v for k, v in self.session.headers.items() if k.lower() != "content-type"}
1161
+ upload_headers.update(headers)
1162
+
1163
+ with open(file_path, "rb") as f:
1164
+ files = {"file": (filename, f)}
1165
+
1166
+ if existing_id:
1167
+ log_debug(f" Updating existing attachment: {existing_id}")
1168
+ url = f"{self.config.base_url}/rest/api/content/{page_id}/child/attachment/{existing_id}/data"
1169
+ else:
1170
+ log_debug(" Creating new attachment")
1171
+ url = f"{self.config.base_url}/rest/api/content/{page_id}/child/attachment"
1172
+
1173
+ # Use session auth but custom headers
1174
+ response = self.session.post(
1175
+ url, files=files, headers=upload_headers,
1176
+ timeout=self.config.timeout
1177
+ )
1178
+
1179
+ log_debug(f" HTTP status: {response.status_code}")
1180
+
1181
+ if response.status_code in (200, 201):
1182
+ data = response.json()
1183
+ results = data.get("results", [data])
1184
+ if results:
1185
+ att_filename = results[0].get("title", filename)
1186
+ log_debug(f" Uploaded: {att_filename}")
1187
+ return att_filename
1188
+
1189
+ log_warn(f" Failed to upload attachment: {filename} (HTTP {response.status_code})")
1190
+ return None
1191
+
1192
+ def set_page_labels(self, page_id: str, labels: List[str]) -> bool:
1193
+ """Set labels on a Confluence page.
1194
+
1195
+ This replaces all existing labels with the provided list.
1196
+ """
1197
+ if not labels:
1198
+ return True
1199
+
1200
+ log_debug(f" Setting labels: {labels}")
1201
+
1202
+ # Get existing labels first
1203
+ url = f"{self.config.base_url}/rest/api/content/{page_id}/label"
1204
+ response = self._request("GET", url)
1205
+
1206
+ existing_labels = []
1207
+ if response.status_code == 200:
1208
+ data = response.json()
1209
+ existing_labels = [lbl.get("name", "") for lbl in data.get("results", [])]
1210
+
1211
+ # Remove labels not in new list
1212
+ for old_label in existing_labels:
1213
+ if old_label not in labels:
1214
+ delete_url = f"{self.config.base_url}/rest/api/content/{page_id}/label/{old_label}"
1215
+ self._request("DELETE", delete_url)
1216
+ log_debug(f" Removed label: {old_label}")
1217
+
1218
+ # Add new labels
1219
+ labels_to_add = [{"prefix": "global", "name": lbl} for lbl in labels if lbl not in existing_labels]
1220
+ if labels_to_add:
1221
+ response = self._request("POST", url, json=labels_to_add)
1222
+ if response.status_code in (200, 201):
1223
+ log_debug(f" Added labels: {[lbl['name'] for lbl in labels_to_add]}")
1224
+ return True
1225
+ else:
1226
+ log_warn(f" Failed to set labels (HTTP {response.status_code})")
1227
+ return False
1228
+
1229
+ return True
1230
+
1231
+
1232
+ # =============================================================================
1233
+ # IMAGE PROCESSING
1234
+ # =============================================================================
1235
+
1236
+ def extract_images(content: str) -> List[Tuple[str, str, str]]:
1237
+ """Extract image references from markdown.
1238
+
1239
+ Returns list of (full_match, alt_text, path) tuples.
1240
+ """
1241
+ pattern = r'(!\[([^\]]*)\]\(([^)]+)\))'
1242
+ return re.findall(pattern, content)
1243
+
1244
+
1245
+ def process_images(content: str, md_file: Path, page_id: str,
1246
+ client: ConfluenceClient, config: Config) -> str:
1247
+ """Process markdown content: upload images and replace with Confluence macros."""
1248
+ md_dir = md_file.parent.resolve()
1249
+ images = extract_images(content)
1250
+
1251
+ if not images:
1252
+ return content
1253
+
1254
+ log_info(" Processing images...")
1255
+
1256
+ for full_match, alt_text, img_path in images:
1257
+ # Skip external URLs
1258
+ if img_path.startswith(('http://', 'https://')):
1259
+ log_debug(f" Skipping external image: {img_path}")
1260
+ continue
1261
+
1262
+ # Resolve relative path
1263
+ if img_path.startswith('/'):
1264
+ full_path = Path(img_path).resolve()
1265
+ else:
1266
+ full_path = (md_dir / img_path).resolve()
1267
+
1268
+ # Security: prevent path traversal outside markdown directory
1269
+ try:
1270
+ full_path.relative_to(md_dir)
1271
+ except ValueError:
1272
+ log_warn(f" Blocked path traversal attempt: {img_path}")
1273
+ content = handle_image_failure(content, full_match, alt_text, img_path, config, "blocked (path traversal)")
1274
+ continue
1275
+
1276
+ if full_path.exists():
1277
+ if config.dry_run:
1278
+ log_info(f" [DRY-RUN] Would upload: {full_path.name}")
1279
+ att_filename = full_path.name
1280
+ else:
1281
+ att_filename = client.upload_attachment(page_id, full_path, allowed_dir=md_dir)
1282
+
1283
+ if att_filename:
1284
+ # Replace markdown image with Confluence ac:image macro
1285
+ confluence_img = f'<ac:image ac:width="{config.max_image_width}" ac:alt="{alt_text}"><ri:attachment ri:filename="{att_filename}" /></ac:image>'
1286
+ content = content.replace(full_match, confluence_img)
1287
+ log_info(f" Replaced: {img_path} -> {att_filename}")
1288
+ else:
1289
+ content = handle_image_failure(content, full_match, alt_text, img_path, config, "upload failed")
1290
+ else:
1291
+ content = handle_image_failure(content, full_match, alt_text, img_path, config, "not found")
1292
+
1293
+ return content
1294
+
1295
+
1296
+ def handle_image_failure(content: str, full_match: str, alt_text: str,
1297
+ img_path: str, config: Config, reason: str) -> str:
1298
+ """Handle image processing failure based on config."""
1299
+ behavior = config.image_failure_behavior
1300
+
1301
+ if behavior == "placeholder":
1302
+ macro_type = "warning" if reason == "not found" else "info"
1303
+ placeholder = f'<ac:structured-macro ac:name="{macro_type}"><ac:rich-text-body><p>[Image {reason}: {alt_text or img_path}]</p></ac:rich-text-body></ac:structured-macro>'
1304
+ content = content.replace(full_match, placeholder)
1305
+ log_warn(f" Image {reason}, using placeholder: {img_path}")
1306
+ elif behavior == "skip":
1307
+ content = content.replace(full_match, "")
1308
+ log_warn(f" Image {reason}, removed: {img_path}")
1309
+ elif behavior == "fail":
1310
+ log_error(f" Image {reason}: {img_path}")
1311
+ raise RuntimeError(f"Image {reason}: {img_path}")
1312
+
1313
+ return content
1314
+
1315
+
1316
+ # =============================================================================
1317
+ # PAGE SYNC
1318
+ # =============================================================================
1319
+
1320
+ def sanitize_title(title: str, config: Config, behavior_override: Optional[str] = None) -> str:
1321
+ """Sanitize page title based on config or override."""
1322
+ behavior = behavior_override if behavior_override else config.title_special_chars
1323
+
1324
+ if behavior == "sanitize":
1325
+ title = re.sub(config.title_strip_pattern, '-', title)
1326
+ title = re.sub(r'--+', '-', title)
1327
+ title = title.strip('-')
1328
+ elif behavior == "encode":
1329
+ title = quote(title)
1330
+ elif behavior == "fail":
1331
+ if re.search(config.title_strip_pattern, title):
1332
+ raise ValueError(f"Title contains special characters: {title}")
1333
+
1334
+ return title
1335
+
1336
+
1337
+ def sync_page(title: str, md_file: str, parent_id: str,
1338
+ client: ConfluenceClient, config: Config) -> bool:
1339
+ """Create or update a Confluence page.
1340
+
1341
+ Front-matter in the markdown file can override:
1342
+ - title: Page title
1343
+ - tags/labels: Page labels
1344
+ - parent_id/parent: Parent page ID
1345
+ - space_key/space: Target space
1346
+ - add_disclaimer/disclaimer: Whether to add disclaimer
1347
+ - max_image_width: Image width constraint
1348
+ - image_failure_behavior: How to handle image failures
1349
+ - title_special_chars: How to handle special chars in title
1350
+ - synchronized/sync: If false, skip this document
1351
+ """
1352
+ md_path = Path(md_file)
1353
+
1354
+ # Handle missing files
1355
+ if not md_path.exists():
1356
+ if config.missing_file_behavior == "skip":
1357
+ log_warn(f"File not found (skipping): {md_file}")
1358
+ return True
1359
+ else:
1360
+ log_error(f"File not found: {md_file}")
1361
+ return False
1362
+
1363
+ # Parse front-matter
1364
+ raw_content = md_path.read_text()
1365
+ doc_meta, content_without_fm = parse_front_matter(raw_content)
1366
+
1367
+ # Check if document should be synchronized
1368
+ if not doc_meta.synchronized:
1369
+ log_info(f"Skipping (synchronized: false): {md_file}")
1370
+ return True
1371
+
1372
+ # Front-matter overrides for title, parent_id
1373
+ effective_title = doc_meta.title if doc_meta.title else title
1374
+ effective_parent_id = doc_meta.parent_id if doc_meta.parent_id else parent_id
1375
+
1376
+ # Determine title_special_chars behavior (doc_meta overrides config)
1377
+ title_behavior = doc_meta.title_special_chars # May be None
1378
+
1379
+ # Sanitize title
1380
+ original_title = effective_title
1381
+ effective_title = sanitize_title(effective_title, config, title_behavior)
1382
+ if effective_title != original_title:
1383
+ log_debug(f"Title sanitized: '{original_title}' -> '{effective_title}'")
1384
+
1385
+ log_info(f"Syncing: {effective_title} from {md_file}")
1386
+ if doc_meta.tags:
1387
+ log_debug(f" Tags from front-matter: {doc_meta.tags}")
1388
+
1389
+ if config.dry_run:
1390
+ log_info(f" [DRY-RUN] Would sync to parent ID: {effective_parent_id}")
1391
+ if doc_meta.tags:
1392
+ log_info(f" [DRY-RUN] Would set labels: {doc_meta.tags}")
1393
+ return True
1394
+
1395
+ # Check if page exists
1396
+ page_id = client.get_page_id(effective_title)
1397
+
1398
+ if page_id:
1399
+ log_info(f" Page exists (ID: {page_id}), updating...")
1400
+
1401
+ # Process images first (needs page_id for uploads)
1402
+ content = process_images(content_without_fm, md_path, page_id, client, config)
1403
+ content = convert_markdown_to_confluence(content, md_file, config, doc_meta)
1404
+
1405
+ # Get current version
1406
+ version = client.get_page_version(page_id)
1407
+ log_debug(f" Current version: {version}")
1408
+
1409
+ # Update page
1410
+ if client.update_page(page_id, effective_title, content, version + 1):
1411
+ log_info(" Updated successfully")
1412
+ # Set labels if specified in front-matter
1413
+ if doc_meta.tags:
1414
+ client.set_page_labels(page_id, doc_meta.tags)
1415
+ return True
1416
+ else:
1417
+ return False
1418
+ else:
1419
+ log_info(" Page does not exist, creating...")
1420
+
1421
+ # Create page first (without images)
1422
+ initial_content = convert_markdown_to_confluence(content_without_fm, md_file, config, doc_meta)
1423
+
1424
+ new_page_id = client.create_page(effective_title, initial_content, effective_parent_id)
1425
+
1426
+ if new_page_id:
1427
+ log_info(f" Created successfully (ID: {new_page_id})")
1428
+
1429
+ # Set labels if specified in front-matter
1430
+ if doc_meta.tags:
1431
+ client.set_page_labels(new_page_id, doc_meta.tags)
1432
+
1433
+ # Now process images and update the page
1434
+ images = extract_images(content_without_fm)
1435
+
1436
+ if images:
1437
+ log_info(" Processing images for new page...")
1438
+ content = process_images(content_without_fm, md_path, new_page_id, client, config)
1439
+ content = convert_markdown_to_confluence(content, md_file, config, doc_meta)
1440
+
1441
+ # Update with images
1442
+ client.update_page(new_page_id, effective_title, content, 2)
1443
+ log_info(" Images processed")
1444
+
1445
+ return True
1446
+ else:
1447
+ return False
1448
+
1449
+
1450
+ def sync_from_config(client: ConfluenceClient, config: Config) -> bool:
1451
+ """Sync all documents from config."""
1452
+ if not config.documents:
1453
+ log_error("No document mappings found in config")
1454
+ log_error("Please define CONFLUENCE_DOC_* mappings in .confluence-sync.conf")
1455
+ log_error("")
1456
+ log_error("Example format:")
1457
+ log_error(' CONFLUENCE_DOC_MYPAGE="Page Title|path/to/file.md|parent_page_id"')
1458
+ return False
1459
+
1460
+ sync_errors = 0
1461
+ synced_count = 0
1462
+
1463
+ for doc_key, (title, path, parent_id) in config.documents.items():
1464
+ if not title or not path or not parent_id:
1465
+ log_warn(f"Invalid mapping for {doc_key}: missing title, path, or parent_id")
1466
+ continue
1467
+
1468
+ if not sync_page(title, path, parent_id, client, config):
1469
+ sync_errors += 1
1470
+ synced_count += 1
1471
+
1472
+ log_info(f"Synced {synced_count} documents with {sync_errors} error(s)")
1473
+
1474
+ return sync_errors == 0
1475
+
1476
+
1477
+ def list_documents(config: Config):
1478
+ """List configured documents."""
1479
+ if not config.documents:
1480
+ log_warn("No CONFLUENCE_DOC_* mappings found in config")
1481
+ log_info("Define mappings in .confluence-sync.conf like:")
1482
+ log_info(' CONFLUENCE_DOC_README="README|README.md|parent_page_id"')
1483
+ return False
1484
+
1485
+ log_info("Configured document mappings:")
1486
+ print()
1487
+ print(f"{'TITLE':<40} {'PATH':<50} {'PARENT_ID':<15} {'EXISTS'}")
1488
+ print(f"{'-----':<40} {'----':<50} {'---------':<15} {'------'}")
1489
+
1490
+ for doc_key, (title, path, parent_id) in config.documents.items():
1491
+ exists = "✓" if Path(path).exists() else "✗"
1492
+ print(f"{title:<40} {path:<50} {parent_id:<15} {exists}")
1493
+
1494
+ print()
1495
+ return True
1496
+
1497
+
1498
+ # =============================================================================
1499
+ # MAIN
1500
+ # =============================================================================
1501
+
1502
+ def check_env(config: Config) -> bool:
1503
+ """Check required configuration settings."""
1504
+ log_info("Checking required configuration...")
1505
+
1506
+ missing = []
1507
+
1508
+ # Check all required settings
1509
+ if not config.api_token:
1510
+ missing.append("CONFLUENCE_API_TOKEN")
1511
+
1512
+ if not config.base_url:
1513
+ missing.append("CONFLUENCE_BASE_URL")
1514
+
1515
+ if not config.space_key:
1516
+ missing.append("CONFLUENCE_SPACE_KEY")
1517
+
1518
+ if config.auth_mode == "basic" and not config.user_email:
1519
+ missing.append("CONFLUENCE_USER_EMAIL (required for basic auth)")
1520
+
1521
+ # Report all missing settings at once
1522
+ if missing:
1523
+ log_error("Missing required configuration:")
1524
+ for item in missing:
1525
+ log_error(f" - {item}")
1526
+ log_error("")
1527
+ log_error("Set these via environment variables or in .confluence-sync.conf")
1528
+ return False
1529
+
1530
+ # Log successful configuration
1531
+ log_info(f"Auth mode: {config.auth_mode}")
1532
+
1533
+ # Show token diagnostics (masked for security)
1534
+ token = config.api_token
1535
+ token_len = len(token)
1536
+ if token_len > 10:
1537
+ token_preview = f"{token[:4]}...{token[-4:]}"
1538
+ else:
1539
+ token_preview = "****"
1540
+ log_debug(f"CONFLUENCE_API_TOKEN: [{token_len} chars, {token_preview}]")
1541
+
1542
+ # Warn about potential token issues
1543
+ if token_len < 20:
1544
+ log_warn("API token seems too short - it may be truncated")
1545
+ if '\n' in token or '\r' in token:
1546
+ log_warn("API token contains newline characters - this may cause auth failures")
1547
+ if token.startswith('"') or token.endswith('"'):
1548
+ log_warn("API token has quote characters - check environment variable quoting")
1549
+
1550
+ if config.user_email:
1551
+ log_debug(f"CONFLUENCE_USER_EMAIL: {config.user_email}")
1552
+ log_debug(f"CONFLUENCE_BASE_URL: {config.base_url}")
1553
+ log_debug(f"CONFLUENCE_SPACE_KEY: {config.space_key}")
1554
+ log_debug(f"CONFLUENCE_TECH_PARENT_ID: {config.tech_parent_id or '(not set)'}")
1555
+ log_debug(f"CONFLUENCE_USER_PARENT_ID: {config.user_parent_id or '(not set)'}")
1556
+
1557
+ log_info("Configuration OK")
1558
+ return True
1559
+
1560
+
1561
+ def local_convert(output_dir: str, config_file: str, verbose: bool) -> int:
1562
+ """Convert configured markdown files to Confluence Storage Format locally.
1563
+
1564
+ Writes .csf files without making any API calls.
1565
+ """
1566
+ global _verbose
1567
+ previous_verbose = _verbose
1568
+ _verbose = verbose
1569
+
1570
+ try:
1571
+ output_path = Path(output_dir).resolve()
1572
+ output_path.mkdir(parents=True, exist_ok=True)
1573
+
1574
+ log_info(f"Local conversion mode - output directory: {output_path}")
1575
+
1576
+ # Load config (only need document mappings)
1577
+ config = load_config(config_file)
1578
+ config.verbose = verbose
1579
+
1580
+ if not config.documents:
1581
+ log_error("No document mappings found in config")
1582
+ return 1
1583
+
1584
+ converted = 0
1585
+ errors = 0
1586
+
1587
+ for doc_key, (title, md_file, parent_id) in config.documents.items():
1588
+ md_path = Path(md_file)
1589
+
1590
+ if not md_path.exists():
1591
+ if config.missing_file_behavior == "skip":
1592
+ log_warn(f"File not found (skipping): {md_file}")
1593
+ continue
1594
+ else:
1595
+ log_error(f"File not found: {md_file}")
1596
+ errors += 1
1597
+ continue
1598
+
1599
+ try:
1600
+ # Read and parse front-matter
1601
+ raw_content = md_path.read_text()
1602
+ doc_meta, content_without_fm = parse_front_matter(raw_content)
1603
+
1604
+ # Check if document should be synchronized
1605
+ if not doc_meta.synchronized:
1606
+ log_info(f"Skipping (synchronized: false): {md_file}")
1607
+ continue
1608
+
1609
+ # Use front-matter title if available
1610
+ effective_title = doc_meta.title if doc_meta.title else title
1611
+
1612
+ # Convert to Confluence format
1613
+ csf_content = convert_markdown_to_confluence(content_without_fm, md_file, config, doc_meta)
1614
+
1615
+ # Write output file - sanitize filename to prevent path traversal
1616
+ safe_stem = re.sub(r'[^\w\-.]', '_', md_path.stem)
1617
+ output_filename = safe_stem + ".csf"
1618
+ output_file = output_path / output_filename
1619
+
1620
+ # Security: ensure output file is within output directory
1621
+ if not output_file.resolve().is_relative_to(output_path):
1622
+ log_error(f"Security: output path traversal blocked for {md_file}")
1623
+ errors += 1
1624
+ continue
1625
+
1626
+ output_file.write_text(csf_content)
1627
+
1628
+ log_info(f"Converted: {md_file} -> {output_file} (title: {effective_title})")
1629
+ if doc_meta.tags:
1630
+ log_debug(f" Tags: {doc_meta.tags}")
1631
+ converted += 1
1632
+
1633
+ except Exception as e:
1634
+ log_error(f"Failed to convert {md_file}: {e}")
1635
+ errors += 1
1636
+
1637
+ log_info("=== Local Conversion Complete ===")
1638
+ log_info(f"Converted: {converted}, Errors: {errors}")
1639
+
1640
+ return 0 if errors == 0 else 1
1641
+ finally:
1642
+ # Restore previous verbose state
1643
+ _verbose = previous_verbose
1644
+
1645
+
1646
+ def main():
1647
+ global _verbose
1648
+
1649
+ parser = argparse.ArgumentParser(
1650
+ description="Sync documentation from repository to Confluence",
1651
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1652
+ epilog="""
1653
+ Environment variables:
1654
+ CONFLUENCE_API_TOKEN API token for authentication (required)
1655
+ CONFLUENCE_USER_EMAIL User email for authentication (required for basic auth)
1656
+ CONFLUENCE_AUTH_MODE Auth mode: basic, bearer, or auto (default: auto)
1657
+ CONFLUENCE_BASE_URL Confluence base URL
1658
+ CONFLUENCE_SPACE_KEY Space key (e.g., ITS)
1659
+
1660
+ Edge case behaviors (set in config):
1661
+ CONFLUENCE_MISSING_FILE_BEHAVIOR skip|fail (default: skip)
1662
+ CONFLUENCE_IMAGE_FAILURE_BEHAVIOR placeholder|skip|fail (default: placeholder)
1663
+ CONFLUENCE_TITLE_SPECIAL_CHARS sanitize|encode|fail (default: sanitize)
1664
+ """
1665
+ )
1666
+
1667
+ parser.add_argument("--dry-run", action="store_true",
1668
+ help="Show what would be synced without making changes")
1669
+ parser.add_argument("--verbose", "-v", action="store_true",
1670
+ help="Enable verbose output")
1671
+ parser.add_argument("--config", default=".confluence-sync.conf",
1672
+ help="Config file path (default: .confluence-sync.conf)")
1673
+ parser.add_argument("--list", action="store_true",
1674
+ help="List configured documents and exit")
1675
+ parser.add_argument("--verify", action="store_true",
1676
+ help="Verify config and connectivity, don't sync")
1677
+ parser.add_argument("--single", nargs=3, metavar=("TITLE", "PATH", "PARENT_ID"),
1678
+ help="Sync a single document")
1679
+ parser.add_argument("--stdin", nargs="?", const="", metavar="FILENAME",
1680
+ help="Read markdown from stdin and convert (for md2confluence.py compatibility)")
1681
+ parser.add_argument("--local", nargs="?", const=".", metavar="OUTPUT_DIR",
1682
+ help="Convert markdown to Confluence Storage Format files locally without API calls. "
1683
+ "Optionally specify output directory (default: current directory)")
1684
+
1685
+ args = parser.parse_args()
1686
+
1687
+ _verbose = args.verbose
1688
+
1689
+ # Handle --stdin for backward compatibility with md2confluence.py
1690
+ if args.stdin is not None:
1691
+ content = sys.stdin.read()
1692
+ result = convert_markdown_to_confluence(content, args.stdin or "")
1693
+ print(result)
1694
+ return 0
1695
+
1696
+ # Handle --local mode (convert without API)
1697
+ if args.local is not None:
1698
+ return local_convert(args.local, args.config, args.verbose)
1699
+
1700
+ # Load configuration
1701
+ config = load_config(args.config)
1702
+ config.dry_run = args.dry_run
1703
+ config.verbose = args.verbose
1704
+
1705
+ # Handle --list option early (doesn't need auth)
1706
+ if args.list:
1707
+ return 0 if list_documents(config) else 1
1708
+
1709
+ log_info("Starting Confluence documentation sync...")
1710
+ log_info(f"Base URL: {config.base_url}")
1711
+ log_info(f"Space Key: {config.space_key}")
1712
+
1713
+ if config.dry_run:
1714
+ log_warn("DRY-RUN mode - no changes will be made")
1715
+ if config.verbose:
1716
+ log_info("Verbose mode enabled")
1717
+
1718
+ # Check environment
1719
+ if not check_env(config):
1720
+ return 1
1721
+
1722
+ # Create API client
1723
+ client = ConfluenceClient(config)
1724
+
1725
+ # Validate space exists
1726
+ log_info("Validating Confluence space...")
1727
+ client.get_space_id()
1728
+
1729
+ # Verify parent pages exist
1730
+ log_info("Verifying parent pages...")
1731
+ parent_errors = 0
1732
+
1733
+ if not client.verify_parent_page(config.tech_parent_id, "Technical Documentation"):
1734
+ parent_errors += 1
1735
+
1736
+ if not client.verify_parent_page(config.user_parent_id, "User Documentation"):
1737
+ parent_errors += 1
1738
+
1739
+ if parent_errors > 0:
1740
+ log_error("Parent page verification failed. Check page IDs in config.")
1741
+ return 4
1742
+
1743
+ # Handle --verify option
1744
+ if args.verify:
1745
+ log_info("Verification complete - all checks passed")
1746
+ return 0
1747
+
1748
+ # Handle --single option
1749
+ if args.single:
1750
+ title, path, parent_id = args.single
1751
+ log_info(f"Syncing single document: {title}")
1752
+ if sync_page(title, path, parent_id, client, config):
1753
+ log_info("=== Sync Complete ===")
1754
+ return 0
1755
+ else:
1756
+ return 3
1757
+
1758
+ # Sync from config
1759
+ if not sync_from_config(client, config):
1760
+ return 3
1761
+
1762
+ log_info("=== Sync Complete ===")
1763
+ log_info("All pages synced successfully")
1764
+ return 0
1765
+
1766
+
1767
+ if __name__ == "__main__":
1768
+ sys.exit(main())