deckbuilder 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deckbuilder/engine.py ADDED
@@ -0,0 +1,1546 @@
1
+ import json
2
+ import os
3
+ import re
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+ import yaml
8
+ from pptx import Presentation
9
+ from pptx.dml.color import RGBColor
10
+ from pptx.enum.shapes import PP_PLACEHOLDER_TYPE
11
+ from pptx.util import Cm, Pt
12
+
13
+ from .placeholder_types import (
14
+ is_content_placeholder,
15
+ is_media_placeholder,
16
+ is_subtitle_placeholder,
17
+ is_title_placeholder,
18
+ )
19
+ from .image_handler import ImageHandler
20
+ from .placekitten_integration import PlaceKittenIntegration
21
+
22
+ try:
23
+ from .table_styles import TABLE_BORDER_STYLES, TABLE_HEADER_STYLES, TABLE_ROW_STYLES
24
+ except ImportError:
25
+ # Fallback values if modules don't exist
26
+ TABLE_HEADER_STYLES = {
27
+ "dark_blue_white_text": {"bg": RGBColor(46, 89, 132), "text": RGBColor(255, 255, 255)}
28
+ }
29
+ TABLE_ROW_STYLES = {
30
+ "alternating_light_gray": {
31
+ "primary": RGBColor(255, 255, 255),
32
+ "alt": RGBColor(240, 240, 240),
33
+ }
34
+ }
35
+ TABLE_BORDER_STYLES = {
36
+ "thin_gray": {"width": Pt(1), "color": RGBColor(128, 128, 128), "style": "all"}
37
+ }
38
+
39
+
40
+ def singleton(cls):
41
+ instances = {}
42
+
43
+ def get_instance(*args, **kwargs):
44
+ if cls not in instances:
45
+ instances[cls] = cls(*args, **kwargs)
46
+ return instances[cls]
47
+
48
+ def reset():
49
+ """Reset the singleton instance for testing purposes"""
50
+ instances.clear()
51
+
52
+ # Allow external access to clear instances for testing
53
+ get_instance._instances = instances
54
+ get_instance.reset = reset
55
+ cls._instances = instances
56
+ cls.reset = reset
57
+
58
+ return get_instance
59
+
60
+
61
+ @singleton
62
+ class Deckbuilder:
63
+ def __init__(self):
64
+ self.template_path = os.getenv("DECK_TEMPLATE_FOLDER")
65
+ self.template_name = os.getenv("DECK_TEMPLATE_NAME")
66
+ self.output_folder = os.getenv("DECK_OUTPUT_FOLDER")
67
+ self.prs = Presentation()
68
+ self.layout_mapping = None
69
+
70
+ # Initialize image handling components with cache in output directory
71
+ cache_dir = "temp/image_cache" # Default fallback
72
+ if self.output_folder:
73
+ cache_dir = os.path.join(self.output_folder, "tmp", "image_cache")
74
+ self.image_handler = ImageHandler(cache_dir)
75
+ self.placekitten = PlaceKittenIntegration(self.image_handler)
76
+
77
+ # Ensure default template exists in templates folder
78
+ self._check_template_exists(self.template_name or "default")
79
+
80
+ def _check_template_exists(self, templateName: str):
81
+ """Check if template exists in the templates folder and copy if needed."""
82
+
83
+ # Use self.template_name if available, otherwise use default
84
+ if not templateName or templateName == "default":
85
+ templateName = self.template_name or "default"
86
+
87
+ # Ensure templateName ends with .pptx
88
+ if not templateName.endswith(".pptx"):
89
+ templateName += ".pptx"
90
+
91
+ if self.template_path:
92
+ try:
93
+ # Create templates folder if it doesn't exist
94
+ os.makedirs(self.template_path, exist_ok=True)
95
+
96
+ # Check if template exists in templates folder
97
+ default_template = os.path.join(self.template_path, templateName)
98
+ if not os.path.exists(default_template):
99
+ # Copy from assets/templates/default.pptx
100
+ assets_path = os.path.join(
101
+ os.path.dirname(__file__), "..", "..", "assets", "templates"
102
+ )
103
+ src_template = os.path.join(assets_path, "default.pptx")
104
+ if os.path.exists(src_template):
105
+ shutil.copy2(src_template, default_template)
106
+
107
+ # Also copy the corresponding JSON mapping file
108
+ base_name = templateName.replace(".pptx", "")
109
+ json_template = os.path.join(self.template_path, base_name + ".json")
110
+ if not os.path.exists(json_template):
111
+ # Copy from assets/templates/default.json
112
+ assets_path = os.path.join(
113
+ os.path.dirname(__file__), "..", "..", "assets", "templates"
114
+ )
115
+ src_json = os.path.join(assets_path, base_name + ".json")
116
+ if os.path.exists(src_json):
117
+ shutil.copy2(src_json, json_template)
118
+ except OSError:
119
+ # Handle file operation errors silently
120
+ pass # nosec - Continue with setup if template copy fails
121
+
122
+ def _load_layout_mapping(self, templateName: str):
123
+ """Load layout mapping from JSON file."""
124
+ if not templateName.endswith(".json"):
125
+ templateName += ".json"
126
+
127
+ # Try to load from template folder first
128
+ if self.template_path:
129
+ mapping_path = os.path.join(self.template_path, templateName)
130
+ if os.path.exists(mapping_path):
131
+ try:
132
+ with open(mapping_path, "r", encoding="utf-8") as f:
133
+ self.layout_mapping = json.load(f)
134
+ return
135
+ except Exception:
136
+ pass # nosec - Continue if layout mapping fails to load
137
+
138
+ # Fallback to src folder
139
+ src_mapping_path = os.path.join(os.path.dirname(__file__), templateName)
140
+ if os.path.exists(src_mapping_path):
141
+ try:
142
+ with open(src_mapping_path, "r", encoding="utf-8") as f:
143
+ self.layout_mapping = json.load(f)
144
+ return
145
+ except Exception:
146
+ return # nosec - Return if fallback layout mapping fails
147
+
148
+ # Use fallback mapping if JSON not found
149
+ self.layout_mapping = {
150
+ "layouts": {"Title and Content": {"index": 1}},
151
+ "aliases": {"content": "Title and Content", "title": "Title Slide"},
152
+ }
153
+
154
+ def _ensure_layout_mapping(self):
155
+ """Ensure layout mapping is loaded, using default template if not already loaded"""
156
+ if self.layout_mapping is None:
157
+ template_name = self.template_name or "default"
158
+ self._load_layout_mapping(template_name)
159
+
160
+ def create_presentation(
161
+ self, templateName: str = "default", fileName: str = "Sample_Presentation"
162
+ ) -> str:
163
+ # Check template exists
164
+ self._check_template_exists(templateName)
165
+
166
+ # Load layout mapping
167
+ base_name = (
168
+ templateName.replace(".pptx", "") if templateName.endswith(".pptx") else templateName
169
+ )
170
+ self._load_layout_mapping(base_name)
171
+
172
+ # Create deck with template
173
+ if not templateName.endswith(".pptx"):
174
+ templateName += ".pptx"
175
+
176
+ template_path = None
177
+ if self.template_path:
178
+ template_path = os.path.join(self.template_path, templateName)
179
+
180
+ # Fallback to src folder if template not found in template_path
181
+ if not template_path or not os.path.exists(template_path):
182
+ src_template_path = os.path.join(os.path.dirname(__file__), templateName)
183
+ if os.path.exists(src_template_path):
184
+ template_path = src_template_path
185
+
186
+ # Load template or create empty presentation
187
+ if template_path and os.path.exists(template_path):
188
+ self.prs = Presentation(template_path)
189
+ else:
190
+ self.prs = Presentation()
191
+
192
+ self._clear_slides()
193
+
194
+ return f"Creating presentation: {fileName}"
195
+
196
+ def write_presentation(self, fileName: str = "Sample_Presentation") -> str:
197
+ """Writes the generated presentation to disk with ISO timestamp."""
198
+ from datetime import datetime
199
+
200
+ # Get output folder from environment or use default
201
+ output_folder = self.output_folder or "."
202
+
203
+ # Ensure output folder exists
204
+ os.makedirs(output_folder, exist_ok=True)
205
+
206
+ # Create filename with ISO timestamp and .g.pptx extension for generated files
207
+ timestamp = datetime.now().strftime("%Y-%m-%d_%H%M")
208
+ generated_filename = f"{fileName}.{timestamp}.g.pptx"
209
+ output_file = os.path.join(output_folder, generated_filename)
210
+
211
+ # Save the presentation (overwrites if same timestamp exists)
212
+ self.prs.save(output_file)
213
+
214
+ return f"Successfully created presentation: {os.path.basename(output_file)}"
215
+
216
+ def add_slide_from_json(self, json_data) -> str:
217
+ """
218
+ Add a slide to the presentation using JSON data.
219
+
220
+ Args:
221
+ json_data: JSON string or dictionary containing slide data
222
+
223
+ Returns:
224
+ Success message
225
+ """
226
+ try:
227
+ # Handle both string and dictionary inputs
228
+ if isinstance(json_data, str):
229
+ # Parse JSON data - handle potential double encoding
230
+ data = json.loads(json_data)
231
+
232
+ # If the result is still a string, parse it again
233
+ if isinstance(data, str):
234
+ data = json.loads(data)
235
+ else:
236
+ # Already a dictionary
237
+ data = json_data
238
+
239
+ # Handle different JSON formats
240
+ if "slides" in data:
241
+ # Multiple slides format
242
+ for slide_data in data["slides"]:
243
+ self._add_slide(slide_data)
244
+ elif "presentation" in data and "slides" in data["presentation"]:
245
+ # Presentation wrapper format
246
+ for slide_data in data["presentation"]["slides"]:
247
+ self._add_slide(slide_data)
248
+ else:
249
+ # Single slide format
250
+ self._add_slide(data)
251
+
252
+ return "Successfully added slide(s) from JSON data"
253
+
254
+ except json.JSONDecodeError as e:
255
+ return f"Error parsing JSON: {str(e)}"
256
+ except Exception as e:
257
+ return f"Error adding slide: {str(e)}"
258
+
259
+ def _clear_slides(self):
260
+ """Clear all slides from the presentation."""
261
+ slide_count = len(self.prs.slides)
262
+ for i in range(slide_count - 1, -1, -1):
263
+ rId = self.prs.slides._sldIdLst[i].rId
264
+ self.prs.part.drop_rel(rId)
265
+ del self.prs.slides._sldIdLst[i]
266
+
267
+ # Reset slide index for consistent image selection
268
+ self._current_slide_index = 0
269
+
270
+ def _add_slide(self, slide_data: dict):
271
+ """
272
+ Add a single slide to the presentation based on slide data.
273
+
274
+ Args:
275
+ slide_data: Dictionary containing slide information
276
+ """
277
+ # Track slide index for consistent image selection
278
+ self._current_slide_index = getattr(self, "_current_slide_index", 0) + 1
279
+
280
+ # Auto-parse JSON formatting for inline formatting support
281
+ slide_data = self._auto_parse_json_formatting(slide_data)
282
+
283
+ # Get slide type and determine layout using JSON mapping
284
+ # Prefer explicit "layout" field over "type" field
285
+ layout_or_type = slide_data.get("layout", slide_data.get("type", "content"))
286
+
287
+ # Use layout mapping if available
288
+ if self.layout_mapping:
289
+ aliases = self.layout_mapping.get("aliases", {})
290
+ layouts = self.layout_mapping.get("layouts", {})
291
+
292
+ # Get layout name from aliases (or use direct layout name if it exists in layouts)
293
+ if layout_or_type in layouts:
294
+ layout_name = layout_or_type
295
+ else:
296
+ layout_name = aliases.get(layout_or_type, layout_or_type)
297
+
298
+ # Get layout index
299
+ layout_info = layouts.get(layout_name, {})
300
+ layout_index = layout_info.get("index", 1)
301
+ else:
302
+ # Fallback
303
+ layout_name = layout_or_type # Use the original layout name as fallback
304
+ layout_index = 1
305
+
306
+ slide_layout = self.prs.slide_layouts[layout_index]
307
+ slide = self.prs.slides.add_slide(slide_layout)
308
+
309
+ # Copy descriptive placeholder names from template mapping
310
+ self._copy_placeholder_names_from_mapping(slide, layout_name)
311
+
312
+ # Add content to placeholders using template mapping + semantic detection
313
+ self._apply_content_to_mapped_placeholders(slide, slide_data, layout_name)
314
+
315
+ # Handle rich content
316
+ if "rich_content" in slide_data:
317
+ self._add_rich_content_to_slide(slide, slide_data["rich_content"])
318
+ elif "content" in slide_data:
319
+ # Fallback to simple content (backwards compatibility)
320
+ self._add_simple_content_to_slide(slide, slide_data["content"])
321
+
322
+ # Add table if provided
323
+ if "table" in slide_data:
324
+ self._add_table_to_slide(slide, slide_data["table"])
325
+
326
+ def _copy_placeholder_names_from_mapping(self, slide, layout_name):
327
+ """
328
+ Copy descriptive placeholder names from template mapping to slide placeholders.
329
+
330
+ This enhances the PowerPoint editing experience by providing meaningful placeholder
331
+ names like "Col 1 Title Placeholder 2" instead of generic "Text Placeholder 2".
332
+
333
+ Args:
334
+ slide: PowerPoint slide object
335
+ layout_name: Name of the PowerPoint layout
336
+ """
337
+ if not self.layout_mapping:
338
+ return
339
+
340
+ # Get layout info from template mapping
341
+ layouts = self.layout_mapping.get("layouts", {})
342
+ layout_info = layouts.get(layout_name, {})
343
+ placeholder_mappings = layout_info.get("placeholders", {})
344
+
345
+ # Update placeholder names to match template mapping
346
+ for placeholder in slide.placeholders:
347
+ placeholder_idx = str(placeholder.placeholder_format.idx)
348
+ if placeholder_idx in placeholder_mappings:
349
+ descriptive_name = placeholder_mappings[placeholder_idx]
350
+ try:
351
+ # Update the placeholder name
352
+ placeholder.element.nvSpPr.cNvPr.name = descriptive_name
353
+ except Exception:
354
+ # Fallback: some placeholder types might not allow name changes
355
+ pass # nosec - Continue processing other placeholders
356
+
357
+ def _apply_content_to_mapped_placeholders(self, slide, slide_data, layout_name):
358
+ """
359
+ Apply content to placeholders using template JSON mappings + semantic detection.
360
+
361
+ This unified method works with both JSON input and markdown frontmatter input:
362
+ 1. Looks up layout in template JSON mappings
363
+ 2. For each field in slide_data, finds corresponding placeholder index
364
+ 3. Gets actual placeholder and determines its semantic type
365
+ 4. Applies content using appropriate semantic handler
366
+
367
+ Args:
368
+ slide: PowerPoint slide object
369
+ slide_data: Dictionary containing slide content (from JSON or markdown)
370
+ layout_name: Name of the PowerPoint layout
371
+ """
372
+ if not self.layout_mapping:
373
+ # Fallback to basic semantic detection if no mapping available
374
+ self._add_content_to_placeholders_fallback(slide, slide_data)
375
+ return
376
+
377
+ # Get layout info from template mapping
378
+ layouts = self.layout_mapping.get("layouts", {})
379
+ layout_info = layouts.get(layout_name, {})
380
+ placeholder_mappings = layout_info.get("placeholders", {})
381
+
382
+ # Create reverse mapping: field_name -> placeholder_index
383
+ field_to_index = {}
384
+ for placeholder_idx, field_name in placeholder_mappings.items():
385
+ field_to_index[field_name] = int(placeholder_idx)
386
+
387
+ # Process each field in slide_data using semantic detection
388
+ for field_name, field_value in slide_data.items():
389
+ # Skip non-content fields
390
+ if field_name in ["type", "rich_content", "table", "layout"]:
391
+ continue
392
+
393
+ # Find placeholder using semantic detection
394
+ target_placeholder = None
395
+
396
+ # Handle title placeholders
397
+ if field_name == "title":
398
+ for placeholder in slide.placeholders:
399
+ if is_title_placeholder(placeholder.placeholder_format.type):
400
+ target_placeholder = placeholder
401
+ break
402
+
403
+ # Handle subtitle placeholders
404
+ elif field_name == "subtitle":
405
+ for placeholder in slide.placeholders:
406
+ if is_subtitle_placeholder(placeholder.placeholder_format.type):
407
+ target_placeholder = placeholder
408
+ break
409
+
410
+ # Handle content placeholders
411
+ elif field_name == "content":
412
+ for placeholder in slide.placeholders:
413
+ if is_content_placeholder(placeholder.placeholder_format.type):
414
+ target_placeholder = placeholder
415
+ break
416
+
417
+ # Handle image_path fields and image placeholder fields - find PICTURE placeholders
418
+ elif (
419
+ field_name == "image_path"
420
+ or field_name.endswith(".image_path")
421
+ or "image" in field_name.lower()
422
+ ):
423
+ for placeholder in slide.placeholders:
424
+ if placeholder.placeholder_format.type == PP_PLACEHOLDER_TYPE.PICTURE:
425
+ target_placeholder = placeholder
426
+ break
427
+
428
+ # Handle other fields by checking if they match placeholder names in JSON mapping
429
+ else:
430
+ # Try to find by exact field name match in JSON mapping
431
+ if field_name in field_to_index:
432
+ placeholder_idx = field_to_index[field_name]
433
+ for placeholder in slide.placeholders:
434
+ if placeholder.placeholder_format.idx == placeholder_idx:
435
+ target_placeholder = placeholder
436
+ break
437
+
438
+ if target_placeholder:
439
+ # Apply content based on placeholder's semantic type
440
+ self._apply_content_by_semantic_type(
441
+ target_placeholder, field_name, field_value, slide_data
442
+ )
443
+
444
+ # Process nested structures like media.image_path
445
+ self._process_nested_image_fields(slide, slide_data)
446
+
447
+ def _add_content_to_placeholders_fallback(self, slide, slide_data):
448
+ """
449
+ Fallback method for basic semantic placeholder detection when no JSON mapping available.
450
+ Uses inline formatting (**bold**, *italic*, ___underline___) processed at render time.
451
+ """
452
+ for shape in slide.placeholders:
453
+ placeholder_type = shape.placeholder_format.type
454
+
455
+ # Handle title placeholders
456
+ if "title" in slide_data and is_title_placeholder(placeholder_type):
457
+ if hasattr(shape, "text_frame") and shape.text_frame:
458
+ text_frame = shape.text_frame
459
+ text_frame.clear()
460
+ p = (
461
+ text_frame.paragraphs[0]
462
+ if text_frame.paragraphs
463
+ else text_frame.add_paragraph()
464
+ )
465
+ self._apply_inline_formatting(slide_data["title"], p)
466
+ else:
467
+ shape.text = slide_data["title"]
468
+
469
+ # Handle subtitle placeholders
470
+ elif "subtitle" in slide_data and is_subtitle_placeholder(placeholder_type):
471
+ if hasattr(shape, "text_frame") and shape.text_frame:
472
+ text_frame = shape.text_frame
473
+ text_frame.clear()
474
+ p = (
475
+ text_frame.paragraphs[0]
476
+ if text_frame.paragraphs
477
+ else text_frame.add_paragraph()
478
+ )
479
+ self._apply_inline_formatting(slide_data["subtitle"], p)
480
+ else:
481
+ shape.text = slide_data["subtitle"]
482
+
483
+ # Handle main content placeholders (for simple content)
484
+ elif "content" in slide_data and is_content_placeholder(placeholder_type):
485
+ # Only use simple content if rich_content is not available
486
+ if "rich_content" not in slide_data:
487
+ self._add_simple_content_to_placeholder(shape, slide_data["content"])
488
+
489
+ def _apply_content_by_semantic_type(self, placeholder, field_name, field_value, slide_data):
490
+ """
491
+ Apply content to a placeholder based on its semantic type and the content type.
492
+ Uses inline formatting (**bold**, *italic*, ___underline___) processed at render time.
493
+ """
494
+ placeholder_type = placeholder.placeholder_format.type
495
+
496
+ # Apply content based on placeholder semantic type
497
+ if is_title_placeholder(placeholder_type) or is_subtitle_placeholder(placeholder_type):
498
+ # Title/subtitle placeholders - apply inline formatting directly
499
+ if hasattr(placeholder, "text_frame") and placeholder.text_frame:
500
+ # Use text frame for formatting support
501
+ text_frame = placeholder.text_frame
502
+ text_frame.clear()
503
+ p = (
504
+ text_frame.paragraphs[0]
505
+ if text_frame.paragraphs
506
+ else text_frame.add_paragraph()
507
+ )
508
+ self._apply_inline_formatting(str(field_value), p)
509
+ else:
510
+ # Fallback to simple text
511
+ placeholder.text = str(field_value)
512
+
513
+ elif is_content_placeholder(placeholder_type):
514
+ # Content placeholders - handle text, lists, etc. with inline formatting
515
+ self._add_simple_content_to_placeholder(placeholder, field_value)
516
+
517
+ elif is_media_placeholder(placeholder_type):
518
+ # Media placeholders - handle images, charts, etc.
519
+ if placeholder_type == PP_PLACEHOLDER_TYPE.PICTURE:
520
+ self._handle_image_placeholder(placeholder, field_name, field_value, slide_data)
521
+ else:
522
+ # Other media types - fallback to text for now
523
+ if hasattr(placeholder, "text_frame") and placeholder.text_frame:
524
+ text_frame = placeholder.text_frame
525
+ text_frame.clear()
526
+ p = (
527
+ text_frame.paragraphs[0]
528
+ if text_frame.paragraphs
529
+ else text_frame.add_paragraph()
530
+ )
531
+ self._apply_inline_formatting(str(field_value), p)
532
+
533
+ else:
534
+ # Other placeholder types - apply inline formatting where possible
535
+ if hasattr(placeholder, "text_frame") and placeholder.text_frame:
536
+ text_frame = placeholder.text_frame
537
+ text_frame.clear()
538
+ p = (
539
+ text_frame.paragraphs[0]
540
+ if text_frame.paragraphs
541
+ else text_frame.add_paragraph()
542
+ )
543
+ self._apply_inline_formatting(str(field_value), p)
544
+ else:
545
+ placeholder.text = str(field_value)
546
+
547
+ def _process_nested_image_fields(self, slide, slide_data):
548
+ """
549
+ Process nested image fields like media.image_path from structured frontmatter.
550
+
551
+ Note: This method handles raw frontmatter with nested media structures.
552
+ Structured frontmatter conversion flattens media.image_path to image_1,
553
+ which is already handled by the main field processing loop.
554
+
555
+ Args:
556
+ slide: PowerPoint slide object
557
+ slide_data: Dictionary containing slide content
558
+ """
559
+ # Skip if this appears to be structured frontmatter that was already converted
560
+ # (indicated by presence of flattened image fields like image_1, image_path)
561
+ has_converted_image_fields = any(
562
+ field_name == "image_path"
563
+ or field_name.endswith("_1")
564
+ and "image" in field_name.lower()
565
+ for field_name in slide_data.keys()
566
+ )
567
+
568
+ if has_converted_image_fields:
569
+ # Already processed by structured frontmatter conversion
570
+ return
571
+
572
+ # Check for media structure with image_path (raw frontmatter)
573
+ if "media" in slide_data and isinstance(slide_data["media"], dict):
574
+ media_data = slide_data["media"]
575
+ image_path = media_data.get("image_path")
576
+
577
+ if image_path:
578
+ # Find the first PICTURE placeholder
579
+ for placeholder in slide.placeholders:
580
+ if placeholder.placeholder_format.type == PP_PLACEHOLDER_TYPE.PICTURE:
581
+ self._handle_image_placeholder(
582
+ placeholder, "media.image_path", image_path, slide_data
583
+ )
584
+ break
585
+
586
+ def _add_simple_content_to_placeholder(self, placeholder, content):
587
+ """Add simple content to a content placeholder with inline formatting support."""
588
+ if not hasattr(placeholder, "text_frame"):
589
+ return
590
+
591
+ text_frame = placeholder.text_frame
592
+ text_frame.clear()
593
+
594
+ if isinstance(content, str):
595
+ p = text_frame.paragraphs[0]
596
+ self._apply_inline_formatting(content, p)
597
+ elif isinstance(content, list):
598
+ for i, line in enumerate(content):
599
+ if i == 0:
600
+ p = text_frame.paragraphs[0] # Use existing first paragraph
601
+ else:
602
+ p = text_frame.add_paragraph()
603
+ self._apply_inline_formatting(line, p)
604
+
605
+ def _parse_inline_formatting(self, text):
606
+ """Parse inline formatting and return structured formatting data"""
607
+ import re
608
+
609
+ if not text:
610
+ return [{"text": "", "format": {}}]
611
+
612
+ # Patterns in order of precedence (longest patterns first to avoid conflicts)
613
+ patterns = [
614
+ (
615
+ r"\*\*\*___(.*?)___\*\*\*",
616
+ {"bold": True, "italic": True, "underline": True},
617
+ ), # ***___text___***
618
+ (
619
+ r"___\*\*\*(.*?)\*\*\*___",
620
+ {"bold": True, "italic": True, "underline": True},
621
+ ), # ___***text***___
622
+ (r"\*\*\*(.*?)\*\*\*", {"bold": True, "italic": True}), # ***text***
623
+ (r"___(.*?)___", {"underline": True}), # ___text___
624
+ (r"\*\*(.*?)\*\*", {"bold": True}), # **text**
625
+ (r"\*(.*?)\*", {"italic": True}), # *text*
626
+ ]
627
+
628
+ # Find all matches and their positions
629
+ all_matches = []
630
+ for pattern, format_dict in patterns:
631
+ for match in re.finditer(pattern, text):
632
+ all_matches.append((match.start(), match.end(), match.group(1), format_dict))
633
+
634
+ # Sort matches by position
635
+ all_matches.sort(key=lambda x: x[0])
636
+
637
+ # Remove overlapping matches (keep the first one found)
638
+ filtered_matches = []
639
+ last_end = 0
640
+ for start, end, content, format_dict in all_matches:
641
+ if start >= last_end:
642
+ filtered_matches.append((start, end, content, format_dict))
643
+ last_end = end
644
+
645
+ # Build the formatted text segments
646
+ segments = []
647
+ last_pos = 0
648
+
649
+ for start, end, content, format_dict in filtered_matches:
650
+ # Add plain text before the formatted text
651
+ if start > last_pos:
652
+ plain_text = text[last_pos:start]
653
+ if plain_text:
654
+ segments.append({"text": plain_text, "format": {}})
655
+
656
+ # Add formatted text
657
+ segments.append({"text": content, "format": format_dict})
658
+ last_pos = end
659
+
660
+ # Add any remaining plain text
661
+ if last_pos < len(text):
662
+ remaining_text = text[last_pos:]
663
+ if remaining_text:
664
+ segments.append({"text": remaining_text, "format": {}})
665
+
666
+ # If no formatting found, return the original text
667
+ if not segments:
668
+ segments = [{"text": text, "format": {}}]
669
+
670
+ return segments
671
+
672
+ def _apply_inline_formatting(self, text, paragraph):
673
+ """Apply inline formatting to paragraph using parsed formatting data."""
674
+ # Clear any existing text
675
+ paragraph.text = ""
676
+
677
+ # Parse the formatting
678
+ segments = self._parse_inline_formatting(text)
679
+
680
+ # Apply each segment to the paragraph
681
+ for segment in segments:
682
+ run = paragraph.add_run()
683
+ run.text = segment["text"]
684
+
685
+ # Apply formatting
686
+ format_dict = segment["format"]
687
+ if format_dict.get("bold"):
688
+ run.font.bold = True
689
+ if format_dict.get("italic"):
690
+ run.font.italic = True
691
+ if format_dict.get("underline"):
692
+ run.font.underline = True
693
+
694
+ def _apply_formatted_segments_to_shape(self, shape, segments):
695
+ """Apply formatted text segments to a shape's text frame."""
696
+ if not hasattr(shape, "text_frame"):
697
+ # For shapes that don't have text_frame, fall back to simple text
698
+ shape.text = "".join(segment["text"] for segment in segments)
699
+ return
700
+
701
+ text_frame = shape.text_frame
702
+ text_frame.clear()
703
+
704
+ # Use the first paragraph or create one
705
+ if text_frame.paragraphs:
706
+ paragraph = text_frame.paragraphs[0]
707
+ else:
708
+ paragraph = text_frame.add_paragraph()
709
+
710
+ paragraph.text = ""
711
+
712
+ # Apply each segment
713
+ for segment in segments:
714
+ run = paragraph.add_run()
715
+ run.text = segment["text"]
716
+
717
+ # Apply formatting
718
+ format_dict = segment["format"]
719
+ if format_dict.get("bold"):
720
+ run.font.bold = True
721
+ if format_dict.get("italic"):
722
+ run.font.italic = True
723
+ if format_dict.get("underline"):
724
+ run.font.underline = True
725
+
726
+ def _apply_formatted_segments_to_cell(self, cell, segments):
727
+ """Apply formatted text segments to a table cell."""
728
+ text_frame = cell.text_frame
729
+ text_frame.clear()
730
+
731
+ # Create first paragraph
732
+ paragraph = text_frame.paragraphs[0]
733
+ paragraph.text = ""
734
+
735
+ # Apply each segment
736
+ for segment in segments:
737
+ run = paragraph.add_run()
738
+ run.text = segment["text"]
739
+
740
+ # Apply formatting
741
+ format_dict = segment["format"]
742
+ if format_dict.get("bold"):
743
+ run.font.bold = True
744
+ if format_dict.get("italic"):
745
+ run.font.italic = True
746
+ if format_dict.get("underline"):
747
+ run.font.underline = True
748
+
749
+ def _add_rich_content_to_slide(self, slide, rich_content: list):
750
+ """Add rich content blocks to a slide with improved formatting"""
751
+ # Find the content placeholder
752
+ content_placeholder = None
753
+ for shape in slide.placeholders:
754
+ if shape.placeholder_format.idx == 1: # Content placeholder
755
+ content_placeholder = shape
756
+ break
757
+
758
+ if not content_placeholder:
759
+ return
760
+
761
+ # Skip if this placeholder has been converted to an image placeholder
762
+ if not hasattr(content_placeholder, "text_frame") or content_placeholder.text_frame is None:
763
+ print(
764
+ f"Warning: Skipping rich content for placeholder "
765
+ f"{content_placeholder.placeholder_format.idx} - converted to image placeholder"
766
+ )
767
+ return
768
+
769
+ # Clear existing content
770
+ text_frame = content_placeholder.text_frame
771
+ text_frame.clear()
772
+ text_frame.word_wrap = True
773
+
774
+ # Set margins for better spacing
775
+ text_frame.margin_left = Cm(0.25)
776
+ text_frame.margin_right = Cm(0.25)
777
+ text_frame.margin_top = Cm(0.25)
778
+ text_frame.margin_bottom = Cm(0.25)
779
+
780
+ # Add each content block with proper hierarchy
781
+ first_content = True
782
+ for block in rich_content:
783
+ if "heading" in block:
784
+ if first_content:
785
+ p = text_frame.paragraphs[0] # Use existing first paragraph
786
+ else:
787
+ p = text_frame.add_paragraph()
788
+ self._apply_inline_formatting(block["heading"], p)
789
+ # Apply bold to all runs in the heading paragraph
790
+ for run in p.runs:
791
+ run.font.bold = True
792
+ p.space_after = Pt(6)
793
+ p.space_before = Pt(12) if not first_content else Pt(0)
794
+
795
+ elif "paragraph" in block:
796
+ if first_content:
797
+ p = text_frame.paragraphs[0] # Use existing first paragraph
798
+ else:
799
+ p = text_frame.add_paragraph()
800
+ self._apply_inline_formatting(block["paragraph"], p)
801
+ p.space_after = Pt(6)
802
+ p.space_before = Pt(3)
803
+
804
+ elif "bullets" in block:
805
+ # Get bullet levels if available, otherwise default to level 1
806
+ bullet_levels = block.get("bullet_levels", [1] * len(block["bullets"]))
807
+
808
+ for bullet_idx, bullet in enumerate(block["bullets"]):
809
+ if first_content and bullet_idx == 0:
810
+ p = text_frame.paragraphs[
811
+ 0
812
+ ] # Use existing first paragraph for first bullet
813
+ else:
814
+ p = text_frame.add_paragraph()
815
+ self._apply_inline_formatting(bullet, p)
816
+
817
+ # Use the parsed bullet level
818
+ bullet_level = (
819
+ bullet_levels[bullet_idx] if bullet_idx < len(bullet_levels) else 1
820
+ )
821
+ p.level = bullet_level
822
+
823
+ # Set spacing based on level
824
+ if bullet_level == 1:
825
+ p.space_after = Pt(3)
826
+ else: # Level 2+ (sub-bullets)
827
+ p.space_after = Pt(2)
828
+
829
+ first_content = False
830
+
831
+ def _add_simple_content_to_slide(self, slide, content):
832
+ """Add simple content to slide with inline formatting support (backwards compatibility)"""
833
+ for shape in slide.placeholders:
834
+ if shape.placeholder_format.idx == 1: # Content placeholder
835
+ # Skip if this placeholder has been converted to an image placeholder
836
+ if not hasattr(shape, "text_frame") or shape.text_frame is None:
837
+ print(
838
+ f"Warning: Skipping content for placeholder "
839
+ f"{shape.placeholder_format.idx} - converted to image placeholder"
840
+ )
841
+ continue
842
+
843
+ text_frame = shape.text_frame
844
+ text_frame.clear()
845
+
846
+ if isinstance(content, str):
847
+ p = text_frame.paragraphs[0]
848
+ self._apply_inline_formatting(content, p)
849
+ elif isinstance(content, list):
850
+ for i, line in enumerate(content):
851
+ if i == 0:
852
+ p = text_frame.paragraphs[0] # Use existing first paragraph
853
+ else:
854
+ p = text_frame.add_paragraph()
855
+ self._apply_inline_formatting(line, p)
856
+ break
857
+
858
+ def _add_table_to_slide(self, slide, table_data):
859
+ """
860
+ Add a styled table to a slide.
861
+
862
+ Args:
863
+ slide: The slide to add the table to
864
+ table_data: Dictionary containing table data and styling options
865
+ """
866
+ # Get table data - support both 'data' and 'rows' keys for backwards compatibility
867
+ data = table_data.get("data", table_data.get("rows", []))
868
+ if not data:
869
+ return
870
+
871
+ # Get styling options
872
+ header_style = table_data.get("header_style", "dark_blue_white_text")
873
+ row_style = table_data.get("row_style", "alternating_light_gray")
874
+ border_style = table_data.get("border_style", "thin_gray")
875
+ custom_colors = table_data.get("custom_colors", {})
876
+
877
+ # Find content placeholder or create table in available space
878
+ content_placeholder = None
879
+ for shape in slide.placeholders:
880
+ if shape.placeholder_format.idx == 1: # Content placeholder
881
+ content_placeholder = shape
882
+ break
883
+
884
+ if content_placeholder:
885
+ # Remove placeholder and create table in its place
886
+ left = content_placeholder.left
887
+ top = content_placeholder.top
888
+ width = content_placeholder.width
889
+ height = content_placeholder.height
890
+
891
+ # Remove the placeholder
892
+ sp = content_placeholder._element
893
+ sp.getparent().remove(sp)
894
+ else:
895
+ # Default positioning if no placeholder found
896
+ left = Cm(2.5)
897
+ top = Cm(5)
898
+ width = Cm(20)
899
+ height = Cm(12)
900
+
901
+ # Create the table
902
+ rows = len(data)
903
+ if data:
904
+ # Handle both old (list of strings) and new (list of dicts) formats
905
+ first_row = data[0]
906
+ if isinstance(first_row, list):
907
+ cols = len(first_row)
908
+ else:
909
+ cols = 1 # Fallback
910
+ else:
911
+ cols = 1
912
+
913
+ table = slide.shapes.add_table(rows, cols, left, top, width, height).table
914
+
915
+ # Apply table data with formatting support
916
+ for row_idx, row_data in enumerate(data):
917
+ for col_idx, cell_data in enumerate(row_data):
918
+ cell = table.cell(row_idx, col_idx)
919
+
920
+ # Handle both old (string) and new (formatted) cell data
921
+ if isinstance(cell_data, dict) and "formatted" in cell_data:
922
+ # New formatted cell data
923
+ self._apply_formatted_segments_to_cell(cell, cell_data["formatted"])
924
+ else:
925
+ # Old string cell data
926
+ cell.text = str(cell_data)
927
+
928
+ # Apply styling
929
+ self._apply_table_styling(table, header_style, row_style, border_style, custom_colors)
930
+
931
+ def _apply_table_styling(self, table, header_style, row_style, border_style, custom_colors):
932
+ """
933
+ Apply styling to a table.
934
+
935
+ Args:
936
+ table: The table object to style
937
+ header_style: Header style name
938
+ row_style: Row style name
939
+ border_style: Border style name
940
+ custom_colors: Dictionary of custom color overrides
941
+ """
942
+ # Apply header styling
943
+ if header_style in TABLE_HEADER_STYLES:
944
+ header_colors = TABLE_HEADER_STYLES[header_style]
945
+
946
+ # Override with custom colors if provided
947
+ bg_color = (
948
+ self._parse_custom_color(custom_colors.get("header_bg")) or header_colors["bg"]
949
+ )
950
+ text_color = (
951
+ self._parse_custom_color(custom_colors.get("header_text")) or header_colors["text"]
952
+ )
953
+
954
+ # Style header row (first row)
955
+ for col_idx in range(len(table.columns)):
956
+ cell = table.cell(0, col_idx)
957
+ # Set background color
958
+ cell.fill.solid()
959
+ cell.fill.fore_color.rgb = bg_color
960
+
961
+ # Set text color and formatting
962
+ for paragraph in cell.text_frame.paragraphs:
963
+ for run in paragraph.runs:
964
+ run.font.color.rgb = text_color
965
+ run.font.bold = True
966
+
967
+ # Apply row styling
968
+ if row_style in TABLE_ROW_STYLES and len(table.rows) > 1:
969
+ row_colors = TABLE_ROW_STYLES[row_style]
970
+
971
+ # Override with custom colors if provided
972
+ primary_color = (
973
+ self._parse_custom_color(custom_colors.get("primary_row")) or row_colors["primary"]
974
+ )
975
+ alt_color = self._parse_custom_color(custom_colors.get("alt_row")) or row_colors["alt"]
976
+
977
+ # Style data rows (skip header row)
978
+ for row_idx in range(1, len(table.rows)):
979
+ is_alt_row = (row_idx - 1) % 2 == 1
980
+ bg_color = alt_color if is_alt_row else primary_color
981
+
982
+ if bg_color is not None:
983
+ for col_idx in range(len(table.columns)):
984
+ cell = table.cell(row_idx, col_idx)
985
+ cell.fill.solid()
986
+ cell.fill.fore_color.rgb = bg_color
987
+
988
+ # Apply border styling
989
+ if border_style in TABLE_BORDER_STYLES:
990
+ self._apply_table_borders(table, TABLE_BORDER_STYLES[border_style], custom_colors)
991
+
992
+ def _apply_table_borders(self, table, border_config, custom_colors):
993
+ """
994
+ Apply border styling to a table.
995
+
996
+ Args:
997
+ table: The table object
998
+ border_config: Border configuration dictionary
999
+ custom_colors: Custom color overrides
1000
+ """
1001
+ border_width = border_config["width"]
1002
+ border_color = (
1003
+ self._parse_custom_color(custom_colors.get("border_color")) or border_config["color"]
1004
+ )
1005
+ border_style = border_config["style"]
1006
+
1007
+ if border_style == "none" or border_width.cm == 0:
1008
+ return
1009
+
1010
+ # Apply borders based on style
1011
+ for row_idx in range(len(table.rows)):
1012
+ for col_idx in range(len(table.columns)):
1013
+ cell = table.cell(row_idx, col_idx)
1014
+
1015
+ if border_style == "all":
1016
+ # All borders
1017
+ self._set_cell_borders(cell, border_width, border_color, all_sides=True)
1018
+ elif border_style == "header" and row_idx == 0:
1019
+ # Only header bottom border
1020
+ self._set_cell_borders(cell, border_width, border_color, bottom=True)
1021
+ elif border_style == "outer":
1022
+ # Only outer borders
1023
+ is_top = row_idx == 0
1024
+ is_bottom = row_idx == len(table.rows) - 1
1025
+ is_left = col_idx == 0
1026
+ is_right = col_idx == len(table.columns) - 1
1027
+
1028
+ self._set_cell_borders(
1029
+ cell,
1030
+ border_width,
1031
+ border_color,
1032
+ top=is_top,
1033
+ bottom=is_bottom,
1034
+ left=is_left,
1035
+ right=is_right,
1036
+ )
1037
+
1038
+ def _set_cell_borders(
1039
+ self, cell, width, color, all_sides=False, top=False, bottom=False, left=False, right=False
1040
+ ):
1041
+ """
1042
+ Set borders for a table cell.
1043
+
1044
+ Args:
1045
+ cell: The table cell
1046
+ width: Border width
1047
+ color: Border color
1048
+ all_sides: Apply to all sides
1049
+ top, bottom, left, right: Apply to specific sides
1050
+ """
1051
+ if color is None:
1052
+ return
1053
+
1054
+ if all_sides:
1055
+ top = bottom = left = right = True
1056
+
1057
+ # Note: python-pptx has limited border support
1058
+ # This is a simplified implementation
1059
+ try:
1060
+ if hasattr(cell, "border"):
1061
+ if top and hasattr(cell.border, "top"):
1062
+ cell.border.top.color.rgb = color
1063
+ cell.border.top.width = width
1064
+ if bottom and hasattr(cell.border, "bottom"):
1065
+ cell.border.bottom.color.rgb = color
1066
+ cell.border.bottom.width = width
1067
+ if left and hasattr(cell.border, "left"):
1068
+ cell.border.left.color.rgb = color
1069
+ cell.border.left.width = width
1070
+ if right and hasattr(cell.border, "right"):
1071
+ cell.border.right.color.rgb = color
1072
+ cell.border.right.width = width
1073
+ except Exception:
1074
+ # Borders not fully supported in python-pptx, skip silently
1075
+ return # nosec - Skip border styling if not supported
1076
+
1077
+ def _parse_custom_color(self, color_value):
1078
+ """
1079
+ Parse a custom color value (hex string) to RGBColor.
1080
+
1081
+ Args:
1082
+ color_value: Hex color string (e.g., "#FF0000")
1083
+
1084
+ Returns:
1085
+ RGBColor object or None if invalid
1086
+ """
1087
+ if not color_value or not isinstance(color_value, str):
1088
+ return None
1089
+
1090
+ try:
1091
+ # Remove # if present
1092
+ color_value = color_value.lstrip("#")
1093
+
1094
+ # Convert hex to RGB
1095
+ if len(color_value) == 6:
1096
+ r = int(color_value[0:2], 16)
1097
+ g = int(color_value[2:4], 16)
1098
+ b = int(color_value[4:6], 16)
1099
+ return RGBColor(r, g, b)
1100
+ except (ValueError, TypeError):
1101
+ pass
1102
+
1103
+ return None
1104
+
1105
+ def parse_markdown_with_frontmatter(self, markdown_content: str) -> list:
1106
+ """
1107
+ Parse markdown content with frontmatter into slide data.
1108
+
1109
+ Args:
1110
+ markdown_content: Markdown string with frontmatter slide definitions
1111
+
1112
+ Returns:
1113
+ List of slide dictionaries ready for _add_slide()
1114
+ """
1115
+ # Ensure layout mapping is loaded
1116
+ self._ensure_layout_mapping()
1117
+
1118
+ slides = []
1119
+
1120
+ # Split content by frontmatter boundaries
1121
+ slide_blocks = re.split(r"^---\s*$", markdown_content, flags=re.MULTILINE)
1122
+
1123
+ i = 0
1124
+ while i < len(slide_blocks):
1125
+ # Skip empty blocks
1126
+ if not slide_blocks[i].strip():
1127
+ i += 1
1128
+ continue
1129
+
1130
+ # Look for frontmatter + content pairs
1131
+ if i + 1 < len(slide_blocks):
1132
+ try:
1133
+ frontmatter_raw = slide_blocks[i].strip()
1134
+ content_raw = slide_blocks[i + 1].strip() if i + 1 < len(slide_blocks) else ""
1135
+
1136
+ # Parse frontmatter with structured frontmatter support
1137
+ slide_config = self._parse_structured_frontmatter(frontmatter_raw)
1138
+
1139
+ # Parse markdown content into slide data
1140
+ slide_data = self._parse_slide_content(content_raw, slide_config)
1141
+ slides.append(slide_data)
1142
+
1143
+ i += 2 # Skip both frontmatter and content blocks
1144
+ except yaml.YAMLError:
1145
+ # If YAML parsing fails, treat as regular content
1146
+ content_raw = slide_blocks[i].strip()
1147
+ slide_data = self._parse_slide_content(content_raw, {})
1148
+ slides.append(slide_data)
1149
+ i += 1
1150
+ else:
1151
+ # Single block without frontmatter
1152
+ content_raw = slide_blocks[i].strip()
1153
+ slide_data = self._parse_slide_content(content_raw, {})
1154
+ slides.append(slide_data)
1155
+ i += 1
1156
+
1157
+ return slides
1158
+
1159
+ def _parse_structured_frontmatter(self, frontmatter_content: str) -> dict:
1160
+ """Parse structured frontmatter and convert to placeholder mappings"""
1161
+ from .structured_frontmatter import (
1162
+ StructuredFrontmatterConverter,
1163
+ StructuredFrontmatterValidator,
1164
+ )
1165
+
1166
+ try:
1167
+ parsed = yaml.safe_load(frontmatter_content)
1168
+ except yaml.YAMLError:
1169
+ # Fallback to safe parsing for special characters
1170
+ return self._parse_frontmatter_safe(frontmatter_content)
1171
+
1172
+ # Handle case where YAML parsing returns a string (malformed YAML)
1173
+ if not isinstance(parsed, dict):
1174
+ return self._parse_frontmatter_safe(frontmatter_content)
1175
+
1176
+ layout_name = parsed.get("layout")
1177
+ if not layout_name:
1178
+ return parsed
1179
+
1180
+ # Ensure layout mapping is loaded for structured frontmatter conversion
1181
+ self._ensure_layout_mapping()
1182
+
1183
+ # Check if this is structured frontmatter
1184
+ converter = StructuredFrontmatterConverter(self.layout_mapping)
1185
+
1186
+ if converter.registry.supports_structured_frontmatter(layout_name):
1187
+ # Validate structured frontmatter
1188
+ validator = StructuredFrontmatterValidator()
1189
+ validation_result = validator.validate_structured_frontmatter(parsed, layout_name)
1190
+ if not validation_result["valid"]:
1191
+ # Log warnings but continue processing
1192
+ for error in validation_result["errors"]:
1193
+ print(f"Error in structured frontmatter: {error}")
1194
+ for warning in validation_result["warnings"]:
1195
+ print(f"Warning in structured frontmatter: {warning}")
1196
+
1197
+ # Convert to placeholder mappings
1198
+ converted = converter.convert_structured_to_placeholders(parsed)
1199
+ return converted
1200
+
1201
+ # Regular frontmatter processing
1202
+ return parsed
1203
+
1204
+ def _parse_frontmatter_safe(self, frontmatter_raw: str) -> dict:
1205
+ """
1206
+ Parse frontmatter safely by handling special characters that break YAML.
1207
+
1208
+ This method processes frontmatter line by line to handle values with
1209
+ markdown formatting characters (*, _, etc.) that would break YAML parsing.
1210
+ """
1211
+ config = {}
1212
+ for line in frontmatter_raw.split("\n"):
1213
+ line = line.strip()
1214
+ if not line or line.startswith("#"):
1215
+ continue
1216
+
1217
+ if ":" in line:
1218
+ key, value = line.split(":", 1)
1219
+ key = key.strip()
1220
+ value = value.strip()
1221
+
1222
+ # Remove quotes if present
1223
+ if value.startswith('"') and value.endswith('"'):
1224
+ value = value[1:-1]
1225
+ elif value.startswith("'") and value.endswith("'"):
1226
+ value = value[1:-1]
1227
+
1228
+ config[key] = value
1229
+
1230
+ return config
1231
+
1232
+ def _parse_slide_content(self, content: str, config: dict) -> dict:
1233
+ """Convert markdown content + config into slide data dict with mixed content support"""
1234
+ slide_data = {
1235
+ "type": config.get("layout", "content"),
1236
+ **config, # Include all frontmatter as slide properties
1237
+ }
1238
+
1239
+ if not content.strip():
1240
+ return slide_data
1241
+
1242
+ lines = content.split("\n")
1243
+
1244
+ # Extract title (first # header)
1245
+ title_found = False
1246
+ content_lines = []
1247
+
1248
+ for line in lines:
1249
+ if line.startswith("# ") and not title_found:
1250
+ title_text = line[2:].strip()
1251
+ slide_data["title"] = title_text
1252
+ slide_data["title_formatted"] = self._parse_inline_formatting(title_text)
1253
+ title_found = True
1254
+ elif line.startswith("## ") and slide_data["type"] == "title":
1255
+ subtitle_text = line[3:].strip()
1256
+ slide_data["subtitle"] = subtitle_text
1257
+ slide_data["subtitle_formatted"] = self._parse_inline_formatting(subtitle_text)
1258
+ else:
1259
+ content_lines.append(line)
1260
+
1261
+ # Parse mixed content based on slide type
1262
+ if slide_data["type"] == "table":
1263
+ slide_data["table"] = self._parse_markdown_table("\n".join(content_lines), config)
1264
+ elif slide_data["type"] != "title": # Content slides get rich content
1265
+ rich_content = self._parse_rich_content("\n".join(content_lines))
1266
+ if rich_content:
1267
+ slide_data["rich_content"] = rich_content
1268
+
1269
+ return slide_data
1270
+
1271
+ def _parse_rich_content(self, content: str) -> list:
1272
+ """Parse mixed markdown content into structured content blocks with better hierarchy"""
1273
+ blocks = []
1274
+ lines = content.split("\n")
1275
+ current_block = None
1276
+
1277
+ for line in lines:
1278
+ original_line = line
1279
+ line = line.strip()
1280
+ if not line:
1281
+ continue
1282
+
1283
+ # Handle nested bullet points by preserving indentation
1284
+ if line.startswith("- ") or line.startswith("* "):
1285
+ # Determine indentation level
1286
+ indent_level = len(original_line) - len(original_line.lstrip())
1287
+ bullet_text = line[2:].strip()
1288
+
1289
+ if not current_block or "bullets" not in current_block:
1290
+ if current_block:
1291
+ blocks.append(current_block)
1292
+ current_block = {"bullets": [], "bullet_levels": []}
1293
+
1294
+ current_block["bullets"].append(bullet_text)
1295
+ # Map indentation to bullet levels (0 indent = level 1, 2+ spaces = level 2, etc.)
1296
+ level = 1 if indent_level < 2 else 2
1297
+ current_block["bullet_levels"].append(level)
1298
+
1299
+ elif line.startswith("## "): # Subheading
1300
+ if current_block:
1301
+ blocks.append(current_block)
1302
+ current_block = {"heading": line[3:].strip(), "level": 2}
1303
+
1304
+ elif line.startswith("### "): # Sub-subheading
1305
+ if current_block:
1306
+ blocks.append(current_block)
1307
+ current_block = {"heading": line[4:].strip(), "level": 3}
1308
+
1309
+ else: # Regular paragraph
1310
+ if not current_block or "paragraph" not in current_block:
1311
+ if current_block:
1312
+ blocks.append(current_block)
1313
+ current_block = {"paragraph": line}
1314
+ else:
1315
+ current_block["paragraph"] += " " + line
1316
+
1317
+ if current_block:
1318
+ blocks.append(current_block)
1319
+
1320
+ return blocks
1321
+
1322
+ def _parse_markdown_table(self, content: str, config: dict) -> dict:
1323
+ """Extract table from markdown and apply styling config"""
1324
+ table_data = {
1325
+ "data": [],
1326
+ "header_style": config.get("style", "dark_blue_white_text"),
1327
+ "row_style": config.get("row_style", "alternating_light_gray"),
1328
+ "border_style": config.get("border_style", "thin_gray"),
1329
+ "custom_colors": config.get("custom_colors", {}),
1330
+ }
1331
+
1332
+ lines = [line.strip() for line in content.split("\n") if line.strip()]
1333
+
1334
+ for line in lines:
1335
+ if line.startswith("|") and line.endswith("|"):
1336
+ # Parse table row with inline formatting
1337
+ cells = [cell.strip() for cell in line[1:-1].split("|")]
1338
+ formatted_cells = []
1339
+ for cell in cells:
1340
+ formatted_cells.append(
1341
+ {"text": cell, "formatted": self._parse_inline_formatting(cell)}
1342
+ )
1343
+ table_data["data"].append(formatted_cells)
1344
+ elif "|" in line and not line.startswith("|"):
1345
+ # Handle tables without outer pipes with inline formatting
1346
+ cells = [cell.strip() for cell in line.split("|")]
1347
+ formatted_cells = []
1348
+ for cell in cells:
1349
+ formatted_cells.append(
1350
+ {"text": cell, "formatted": self._parse_inline_formatting(cell)}
1351
+ )
1352
+ table_data["data"].append(formatted_cells)
1353
+ elif line.startswith("---") or line.startswith("==="):
1354
+ # Skip separator lines
1355
+ continue
1356
+
1357
+ return table_data
1358
+
1359
+ def _auto_parse_json_formatting(self, slide_data):
1360
+ """Auto-parse inline formatting in JSON slide data."""
1361
+ # Create a copy to avoid modifying original
1362
+ processed_data = slide_data.copy()
1363
+
1364
+ # Parse title if present
1365
+ if "title" in processed_data and processed_data["title"]:
1366
+ title_text = processed_data["title"]
1367
+ processed_data["title_formatted"] = self._parse_inline_formatting(title_text)
1368
+
1369
+ # Parse subtitle if present
1370
+ if "subtitle" in processed_data and processed_data["subtitle"]:
1371
+ subtitle_text = processed_data["subtitle"]
1372
+ processed_data["subtitle_formatted"] = self._parse_inline_formatting(subtitle_text)
1373
+
1374
+ # Parse content list if present
1375
+ if "content" in processed_data and isinstance(processed_data["content"], list):
1376
+ # Convert simple content to rich content with formatting
1377
+ rich_content = []
1378
+ for item in processed_data["content"]:
1379
+ if isinstance(item, str):
1380
+ # Treat as paragraph text
1381
+ rich_content.append({"paragraph": item})
1382
+ processed_data["rich_content"] = rich_content
1383
+ # Remove old content key to avoid conflicts
1384
+ del processed_data["content"]
1385
+
1386
+ # Parse table data if present
1387
+ if "table" in processed_data and "data" in processed_data["table"]:
1388
+ table_data = processed_data["table"]
1389
+ if isinstance(table_data["data"], list):
1390
+ formatted_data = []
1391
+ for row in table_data["data"]:
1392
+ if isinstance(row, list):
1393
+ formatted_row = []
1394
+ for cell in row:
1395
+ if isinstance(cell, str):
1396
+ formatted_row.append(
1397
+ {
1398
+ "text": cell,
1399
+ "formatted": self._parse_inline_formatting(cell),
1400
+ }
1401
+ )
1402
+ else:
1403
+ # Keep non-string cells as-is
1404
+ formatted_row.append(cell)
1405
+ formatted_data.append(formatted_row)
1406
+ else:
1407
+ # Keep non-list rows as-is
1408
+ formatted_data.append(row)
1409
+ processed_data["table"]["data"] = formatted_data
1410
+
1411
+ # Note: Removed complex formatting preprocessing - formatting now handled at render time
1412
+ return processed_data
1413
+
1414
+ def create_presentation_from_markdown(
1415
+ self,
1416
+ markdown_content: str,
1417
+ fileName: str = "Sample_Presentation",
1418
+ templateName: str = "default",
1419
+ ) -> str:
1420
+ """Create presentation from formatted markdown with frontmatter"""
1421
+ try:
1422
+ slides = self.parse_markdown_with_frontmatter(markdown_content)
1423
+
1424
+ # Create presentation
1425
+ self.create_presentation(templateName, fileName)
1426
+
1427
+ # Add all slides to the presentation
1428
+ for slide_data in slides:
1429
+ self._add_slide(slide_data)
1430
+
1431
+ # Automatically save the presentation to disk after creation
1432
+ write_result = self.write_presentation(fileName)
1433
+
1434
+ return (
1435
+ f"Successfully created presentation with {len(slides)} slides from markdown. "
1436
+ f"{write_result}"
1437
+ )
1438
+ except Exception as e:
1439
+ return f"Error creating presentation from markdown: {str(e)}"
1440
+
1441
+ def _handle_image_placeholder(self, placeholder, field_name, field_value, slide_data):
1442
+ """
1443
+ Handle image insertion into PICTURE placeholders with smart fallback.
1444
+
1445
+ Args:
1446
+ placeholder: PowerPoint picture placeholder
1447
+ field_name: Name of the field (e.g., 'image_path', 'media.image_path')
1448
+ field_value: Image path or URL
1449
+ slide_data: Complete slide data for context
1450
+ """
1451
+ try:
1452
+ # Get placeholder dimensions for proper image sizing
1453
+ width = placeholder.width
1454
+ height = placeholder.height
1455
+ dimensions = (int(width.inches * 96), int(height.inches * 96)) # Convert to pixels
1456
+
1457
+ # Prepare context for consistent PlaceKitten generation
1458
+ context = {
1459
+ "layout": slide_data.get("layout", slide_data.get("type", "unknown")),
1460
+ "slide_index": getattr(self, "_current_slide_index", 0),
1461
+ }
1462
+
1463
+ # Try to use provided image path
1464
+ final_image_path = None
1465
+ if field_value and isinstance(field_value, str):
1466
+ # Validate and process the provided image
1467
+ if self.image_handler.validate_image(field_value):
1468
+ final_image_path = self.image_handler.process_image(
1469
+ field_value, dimensions, quality="high"
1470
+ )
1471
+ else:
1472
+ print(f"Warning: Invalid image path '{field_value}', using fallback")
1473
+
1474
+ # Generate PlaceKitten fallback if needed
1475
+ if not final_image_path:
1476
+ final_image_path = self.placekitten.generate_fallback(dimensions, context)
1477
+
1478
+ # Insert image into placeholder if we have a valid path
1479
+ if final_image_path and Path(final_image_path).exists():
1480
+ try:
1481
+ # Check if placeholder can accept images (not already filled)
1482
+ if hasattr(placeholder, "insert_picture"):
1483
+ # Insert image into the picture placeholder
1484
+ picture = placeholder.insert_picture(final_image_path)
1485
+
1486
+ # Preserve alt text if provided
1487
+ alt_text = slide_data.get("alt_text") or slide_data.get("media", {}).get(
1488
+ "alt_text"
1489
+ )
1490
+ if alt_text and hasattr(picture, "element"):
1491
+ # Set accessibility description
1492
+ picture.element.nvPicPr.cNvPr.descr = str(alt_text)
1493
+
1494
+ print(f"✅ Successfully inserted image into placeholder: {field_name}")
1495
+ else:
1496
+ msg = f"Warning: Placeholder {field_name} cannot accept images"
1497
+ print(msg)
1498
+ # Try to replace existing content if it's a picture shape
1499
+ if hasattr(placeholder, "element") and hasattr(
1500
+ placeholder.element, "nvPicPr"
1501
+ ):
1502
+ print(" Placeholder already contains an image, skipping...")
1503
+ elif hasattr(placeholder, "text_frame") and placeholder.text_frame:
1504
+ placeholder.text_frame.text = f"Image: {Path(final_image_path).name}"
1505
+
1506
+ except Exception as e:
1507
+ print(f"Warning: Failed to insert image into placeholder: {e}")
1508
+ # Fallback: add image path as text if insertion fails
1509
+ if hasattr(placeholder, "text_frame") and placeholder.text_frame:
1510
+ placeholder.text_frame.text = f"Image: {Path(final_image_path).name}"
1511
+
1512
+ else:
1513
+ print(f"Warning: No valid image available for placeholder {field_name}")
1514
+ # Fallback: show placeholder text
1515
+ if hasattr(placeholder, "text_frame") and placeholder.text_frame:
1516
+ placeholder.text_frame.text = "Image placeholder"
1517
+
1518
+ except Exception as e:
1519
+ print(f"Error handling image placeholder {field_name}: {e}")
1520
+ # Fallback: show error message in placeholder
1521
+ if hasattr(placeholder, "text_frame") and placeholder.text_frame:
1522
+ placeholder.text_frame.text = f"Image error: {field_name}"
1523
+
1524
+ def _get_placeholder_dimensions_pixels(self, placeholder):
1525
+ """
1526
+ Get placeholder dimensions in pixels for image processing.
1527
+
1528
+ Args:
1529
+ placeholder: PowerPoint placeholder object
1530
+
1531
+ Returns:
1532
+ tuple: (width, height) in pixels
1533
+ """
1534
+ try:
1535
+ # Convert EMU units to inches, then to pixels (96 DPI)
1536
+ width_pixels = int(placeholder.width.inches * 96)
1537
+ height_pixels = int(placeholder.height.inches * 96)
1538
+ return (width_pixels, height_pixels)
1539
+ except Exception:
1540
+ # Fallback to common slide dimensions
1541
+ return (800, 600)
1542
+
1543
+
1544
+ def get_deckbuilder_client():
1545
+ # Return singleton instance of Deckbuilder
1546
+ return Deckbuilder()