deckbuilder 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,291 @@
1
+ """
2
+ ImageHandler - Core image file validation, processing, and management.
3
+
4
+ This module provides the ImageHandler class for validating image files,
5
+ processing them for PowerPoint placeholders, and managing cached fallback images.
6
+ """
7
+
8
+ import hashlib
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Optional, Tuple
12
+
13
+ from PIL import Image
14
+
15
+
16
+ class ImageHandler:
17
+ """
18
+ Core image file validation, processing, and management.
19
+
20
+ Handles image file validation, format conversion, resizing,
21
+ and caching for optimal performance in presentation generation.
22
+ """
23
+
24
+ def __init__(self, cache_dir: str = "temp/image_cache"):
25
+ """
26
+ Initialize ImageHandler with cache directory.
27
+
28
+ Args:
29
+ cache_dir: Directory for caching processed images
30
+ """
31
+ self.cache_dir = Path(cache_dir)
32
+ try:
33
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
34
+ except OSError:
35
+ # Fallback to a temporary directory if cache_dir is not writable
36
+ import tempfile
37
+
38
+ self.cache_dir = Path(tempfile.gettempdir()) / "deckbuilder_image_cache"
39
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
40
+
41
+ # Supported image formats for PowerPoint
42
+ self.supported_formats = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".gif"}
43
+
44
+ # Quality settings for output
45
+ self.quality_settings = {"high": 95, "medium": 85, "low": 70}
46
+
47
+ def validate_image(self, image_path: str) -> bool:
48
+ """
49
+ Validate image file existence, format, and accessibility.
50
+
51
+ Args:
52
+ image_path: Path to image file to validate
53
+
54
+ Returns:
55
+ bool: True if image is valid and accessible, False otherwise
56
+ """
57
+ if not image_path:
58
+ return False
59
+
60
+ try:
61
+ path = Path(image_path)
62
+
63
+ # Check file existence
64
+ if not path.exists():
65
+ return False
66
+
67
+ # Check if it's a file (not directory)
68
+ if not path.is_file():
69
+ return False
70
+
71
+ # Check file extension
72
+ if path.suffix.lower() not in self.supported_formats:
73
+ return False
74
+
75
+ # Try to open and validate the image
76
+ with Image.open(path) as img:
77
+ # Verify image can be loaded and read
78
+ img.verify()
79
+
80
+ return True
81
+
82
+ except Exception:
83
+ # Any exception during validation means invalid image
84
+ return False
85
+
86
+ def get_image_dimensions(self, image_path: str) -> Optional[Tuple[int, int]]:
87
+ """
88
+ Get image dimensions without loading the full image.
89
+
90
+ Args:
91
+ image_path: Path to image file
92
+
93
+ Returns:
94
+ Tuple of (width, height) or None if image invalid
95
+ """
96
+ try:
97
+ with Image.open(image_path) as img:
98
+ return img.size
99
+ except Exception:
100
+ return None
101
+
102
+ def process_image(
103
+ self, image_path: str, target_dimensions: Tuple[int, int], quality: str = "high"
104
+ ) -> Optional[str]:
105
+ """
106
+ Process and resize image to target dimensions for PowerPoint placeholder.
107
+
108
+ Args:
109
+ image_path: Path to source image file
110
+ target_dimensions: Target (width, height) for placeholder
111
+ quality: Quality level ('high', 'medium', 'low')
112
+
113
+ Returns:
114
+ str: Path to processed image file, or None if processing failed
115
+ """
116
+ if not self.validate_image(image_path):
117
+ return None
118
+
119
+ try:
120
+ target_width, target_height = target_dimensions
121
+
122
+ # Generate cache key based on input and parameters
123
+ cache_key = self._generate_cache_key(image_path, target_dimensions, quality)
124
+ cached_path = self._get_cached_image(cache_key)
125
+
126
+ if cached_path:
127
+ return cached_path
128
+
129
+ # Process the image
130
+ with Image.open(image_path) as img:
131
+ # Convert to RGB if necessary (for JPEG output)
132
+ if img.mode != "RGB":
133
+ img = img.convert("RGB")
134
+
135
+ # Calculate dimensions preserving aspect ratio
136
+ processed_img = self._resize_with_aspect_ratio(img, target_width, target_height)
137
+
138
+ # Save processed image
139
+ output_path = self._save_processed_image(processed_img, cache_key, quality)
140
+
141
+ return str(output_path)
142
+
143
+ except Exception as e:
144
+ print(f"Warning: Image processing failed for {image_path}: {e}")
145
+ return None
146
+
147
+ def _resize_with_aspect_ratio(
148
+ self, img: Image.Image, target_width: int, target_height: int
149
+ ) -> Image.Image:
150
+ """
151
+ Resize image to fit target dimensions while preserving aspect ratio.
152
+
153
+ Args:
154
+ img: PIL Image object
155
+ target_width: Target width
156
+ target_height: Target height
157
+
158
+ Returns:
159
+ PIL Image resized to fit within target dimensions
160
+ """
161
+ # Get original dimensions
162
+ orig_width, orig_height = img.size
163
+
164
+ # Calculate scaling ratios
165
+ width_ratio = target_width / orig_width
166
+ height_ratio = target_height / orig_height
167
+
168
+ # Use the smaller ratio to ensure image fits within bounds
169
+ scale_ratio = min(width_ratio, height_ratio)
170
+
171
+ # Calculate new dimensions
172
+ new_width = int(orig_width * scale_ratio)
173
+ new_height = int(orig_height * scale_ratio)
174
+
175
+ # Resize using high-quality resampling
176
+ return img.resize((new_width, new_height), Image.Resampling.LANCZOS)
177
+
178
+ def _generate_cache_key(
179
+ self, image_path: str, dimensions: Tuple[int, int], quality: str
180
+ ) -> str:
181
+ """
182
+ Generate unique cache key for processed image.
183
+
184
+ Args:
185
+ image_path: Source image path
186
+ dimensions: Target dimensions
187
+ quality: Quality setting
188
+
189
+ Returns:
190
+ str: Unique cache key
191
+ """
192
+ # Include file modification time to detect changes
193
+ try:
194
+ mtime = os.path.getmtime(image_path)
195
+ except OSError:
196
+ mtime = 0
197
+
198
+ # Create hash from path, dimensions, quality, and modification time
199
+ key_data = f"{image_path}_{dimensions[0]}x{dimensions[1]}_{quality}_{mtime}"
200
+ return hashlib.md5(key_data.encode(), usedforsecurity=False).hexdigest() # nosec
201
+
202
+ def _get_cached_image(self, cache_key: str) -> Optional[str]:
203
+ """
204
+ Retrieve cached processed image if available.
205
+
206
+ Args:
207
+ cache_key: Cache key for the image
208
+
209
+ Returns:
210
+ str: Path to cached image, or None if not cached
211
+ """
212
+ cached_path = self.cache_dir / f"{cache_key}.jpg"
213
+
214
+ if cached_path.exists():
215
+ return str(cached_path)
216
+
217
+ return None
218
+
219
+ def _save_processed_image(self, img: Image.Image, cache_key: str, quality: str) -> Path:
220
+ """
221
+ Save processed image to cache with specified quality.
222
+
223
+ Args:
224
+ img: PIL Image to save
225
+ cache_key: Cache key for filename
226
+ quality: Quality level for JPEG compression
227
+
228
+ Returns:
229
+ Path: Path to saved image file
230
+ """
231
+ output_path = self.cache_dir / f"{cache_key}.jpg"
232
+
233
+ # Get quality setting
234
+ jpeg_quality = self.quality_settings.get(quality, 95)
235
+
236
+ # Save as JPEG with specified quality
237
+ img.save(output_path, "JPEG", quality=jpeg_quality, optimize=True)
238
+
239
+ return output_path
240
+
241
+ def cleanup_cache(self, max_size_mb: int = 100):
242
+ """
243
+ Clean up cache directory if it exceeds maximum size.
244
+
245
+ Args:
246
+ max_size_mb: Maximum cache size in megabytes
247
+ """
248
+ try:
249
+ # Calculate total cache size
250
+ total_size = sum(f.stat().st_size for f in self.cache_dir.glob("*.jpg") if f.is_file())
251
+
252
+ max_size_bytes = max_size_mb * 1024 * 1024
253
+
254
+ if total_size > max_size_bytes:
255
+ # Get files sorted by access time (LRU)
256
+ cache_files = [
257
+ (f, f.stat().st_atime) for f in self.cache_dir.glob("*.jpg") if f.is_file()
258
+ ]
259
+ cache_files.sort(key=lambda x: x[1]) # Sort by access time
260
+
261
+ # Remove oldest files until under limit
262
+ current_size = total_size
263
+ for file_path, _ in cache_files:
264
+ if current_size <= max_size_bytes:
265
+ break
266
+
267
+ file_size = file_path.stat().st_size
268
+ file_path.unlink()
269
+ current_size -= file_size
270
+
271
+ except Exception as e:
272
+ print(f"Warning: Cache cleanup failed: {e}")
273
+
274
+ def get_cache_stats(self) -> dict:
275
+ """
276
+ Get statistics about the image cache.
277
+
278
+ Returns:
279
+ dict: Cache statistics including file count and total size
280
+ """
281
+ try:
282
+ cache_files = list(self.cache_dir.glob("*.jpg"))
283
+ total_size = sum(f.stat().st_size for f in cache_files if f.is_file())
284
+
285
+ return {
286
+ "file_count": len(cache_files),
287
+ "total_size_mb": round(total_size / (1024 * 1024), 2),
288
+ "cache_dir": str(self.cache_dir),
289
+ }
290
+ except Exception:
291
+ return {"file_count": 0, "total_size_mb": 0.0, "cache_dir": str(self.cache_dir)}
@@ -0,0 +1,288 @@
1
+ {
2
+ "layout_intelligence": {
3
+ "version": "1.0",
4
+ "description": "Semantic metadata for content-first layout recommendations using convention-based placeholder names",
5
+ "last_updated": "2025-06-23"
6
+ },
7
+ "content_patterns": {
8
+ "intent_recognition": {
9
+ "comparison": {
10
+ "keywords": ["vs", "versus", "compare", "contrast", "option", "alternative", "pros", "cons"],
11
+ "structure": ["paired_content", "left_right", "side_by_side"],
12
+ "layouts": ["Comparison", "Two Content"],
13
+ "confidence": 0.9
14
+ },
15
+ "overview": {
16
+ "keywords": ["overview", "summary", "introduction", "agenda", "outline", "roadmap"],
17
+ "structure": ["list_items", "bullet_points", "numbered_items"],
18
+ "layouts": ["Title and Content", "Agenda, 6 Textboxes"],
19
+ "confidence": 0.8
20
+ },
21
+ "features": {
22
+ "keywords": ["feature", "benefit", "capability", "function", "advantage"],
23
+ "structure": ["multiple_columns", "categorized_content"],
24
+ "layouts": ["Four Columns With Titles", "Three Columns With Titles"],
25
+ "confidence": 0.85
26
+ },
27
+ "process": {
28
+ "keywords": ["step", "process", "workflow", "procedure", "method"],
29
+ "structure": ["sequential", "numbered", "timeline"],
30
+ "layouts": ["Agenda, 6 Textboxes", "Title and 6-item Lists"],
31
+ "confidence": 0.8
32
+ },
33
+ "statistics": {
34
+ "keywords": ["number", "percent", "metric", "data", "result", "performance"],
35
+ "structure": ["big_number", "key_metric", "single_focus"],
36
+ "layouts": ["Big Number", "Title and Content"],
37
+ "confidence": 0.9
38
+ },
39
+ "analysis": {
40
+ "keywords": ["swot", "analysis", "strength", "weakness", "opportunity", "threat"],
41
+ "structure": ["four_quadrant", "categorized_analysis"],
42
+ "layouts": ["SWOT Analysis", "Four Columns With Titles"],
43
+ "confidence": 0.95
44
+ }
45
+ },
46
+ "content_type_mapping": {
47
+ "title_content": {
48
+ "placeholders": ["title_top_1"],
49
+ "detection": ["single_heading", "main_title"],
50
+ "layouts": ["Title Slide", "Title Only"]
51
+ },
52
+ "subtitle_content": {
53
+ "placeholders": ["subtitle_1"],
54
+ "detection": ["secondary_heading", "tagline"],
55
+ "layouts": ["Title Slide"]
56
+ },
57
+ "body_content": {
58
+ "placeholders": ["content_1", "content_main_1"],
59
+ "detection": ["paragraph", "bullet_list", "description"],
60
+ "layouts": ["Title and Content", "Section Header"]
61
+ },
62
+ "column_content": {
63
+ "placeholders": ["content_col1_1", "content_col2_1", "content_col3_1", "content_col4_1"],
64
+ "detection": ["multiple_sections", "categorized_items", "parallel_content"],
65
+ "layouts": ["Four Columns With Titles", "Three Columns With Titles", "Four Columns", "Three Columns"]
66
+ },
67
+ "comparison_content": {
68
+ "placeholders": ["content_left_1", "content_right_1", "title_left_1", "title_right_1"],
69
+ "detection": ["paired_items", "versus_content", "side_by_side"],
70
+ "layouts": ["Comparison", "Two Content"]
71
+ },
72
+ "image_content": {
73
+ "placeholders": ["image_1", "image_main_1"],
74
+ "detection": ["image_reference", "media_placeholder", "visual_content"],
75
+ "layouts": ["Picture with Caption"]
76
+ },
77
+ "caption_content": {
78
+ "placeholders": ["text_caption_1"],
79
+ "detection": ["image_description", "caption_text"],
80
+ "layouts": ["Picture with Caption", "Content with Caption"]
81
+ },
82
+ "agenda_content": {
83
+ "placeholders": ["number_item1_1", "content_item1_1", "number_item2_1", "content_item2_1"],
84
+ "detection": ["numbered_list", "agenda_items", "sequential_content"],
85
+ "layouts": ["Agenda, 6 Textboxes", "Title and 6-item Lists"]
86
+ }
87
+ }
88
+ },
89
+ "layout_compatibility": {
90
+ "Title Slide": {
91
+ "optimal_for": ["presentation_intro", "section_divider", "title_only"],
92
+ "placeholders": {
93
+ "required": ["title_top_1"],
94
+ "optional": ["subtitle_1", "date_footer_1", "footer_footer_1", "slide_number_footer_1"]
95
+ },
96
+ "content_hints": {
97
+ "title_top_1": "Main presentation title or section heading",
98
+ "subtitle_1": "Supporting tagline or description"
99
+ },
100
+ "confidence_factors": {
101
+ "single_heading": 0.9,
102
+ "short_title": 0.8,
103
+ "no_body_content": 0.85
104
+ }
105
+ },
106
+ "Title and Content": {
107
+ "optimal_for": ["general_content", "bullet_lists", "descriptions", "single_topic"],
108
+ "placeholders": {
109
+ "required": ["title_top_1", "content_1"],
110
+ "optional": ["date_footer_1", "footer_footer_1", "slide_number_footer_1"]
111
+ },
112
+ "content_hints": {
113
+ "title_top_1": "Slide topic or main point",
114
+ "content_1": "Bullet points, paragraphs, or detailed content"
115
+ },
116
+ "confidence_factors": {
117
+ "single_content_block": 0.8,
118
+ "bullet_list": 0.85,
119
+ "paragraph_content": 0.7
120
+ }
121
+ },
122
+ "Comparison": {
123
+ "optimal_for": ["vs_content", "pros_cons", "before_after", "option_analysis"],
124
+ "placeholders": {
125
+ "required": ["title_top_1", "title_left_1", "content_left_1", "title_right_1", "content_right_1"],
126
+ "optional": ["date_footer_1", "footer_footer_1", "slide_number_footer_1"]
127
+ },
128
+ "content_hints": {
129
+ "title_top_1": "Comparison topic",
130
+ "title_left_1": "Left option title",
131
+ "content_left_1": "Left option details",
132
+ "title_right_1": "Right option title",
133
+ "content_right_1": "Right option details"
134
+ },
135
+ "confidence_factors": {
136
+ "paired_content": 0.95,
137
+ "vs_keywords": 0.9,
138
+ "comparison_structure": 0.85
139
+ }
140
+ },
141
+ "Four Columns With Titles": {
142
+ "optimal_for": ["feature_grid", "benefit_overview", "four_categories", "quadrant_analysis"],
143
+ "placeholders": {
144
+ "required": ["title_top_1", "title_col1_1", "content_col1_1", "title_col2_1", "content_col2_1", "title_col3_1", "content_col3_1", "title_col4_1", "content_col4_1"],
145
+ "optional": ["date_footer_1", "footer_footer_1", "slide_number_footer_1"]
146
+ },
147
+ "content_hints": {
148
+ "title_top_1": "Overall category or topic",
149
+ "title_col1_1": "First category title",
150
+ "content_col1_1": "First category details",
151
+ "title_col2_1": "Second category title",
152
+ "content_col2_1": "Second category details",
153
+ "title_col3_1": "Third category title",
154
+ "content_col3_1": "Third category details",
155
+ "title_col4_1": "Fourth category title",
156
+ "content_col4_1": "Fourth category details"
157
+ },
158
+ "confidence_factors": {
159
+ "four_items": 0.95,
160
+ "categorized_content": 0.9,
161
+ "parallel_structure": 0.85
162
+ }
163
+ },
164
+ "Three Columns With Titles": {
165
+ "optimal_for": ["three_categories", "feature_comparison", "process_steps"],
166
+ "placeholders": {
167
+ "required": ["title_top_1", "title_col1_1", "content_col1_1", "title_col2_1", "content_col2_1", "title_col3_1", "content_col3_1"],
168
+ "optional": ["date_footer_1", "footer_footer_1", "slide_number_footer_1"]
169
+ },
170
+ "content_hints": {
171
+ "title_top_1": "Overall topic",
172
+ "title_col1_1": "First category",
173
+ "content_col1_1": "First details",
174
+ "title_col2_1": "Second category",
175
+ "content_col2_1": "Second details",
176
+ "title_col3_1": "Third category",
177
+ "content_col3_1": "Third details"
178
+ },
179
+ "confidence_factors": {
180
+ "three_items": 0.9,
181
+ "categorized_content": 0.85,
182
+ "balanced_content": 0.8
183
+ }
184
+ },
185
+ "Picture with Caption": {
186
+ "optimal_for": ["visual_content", "image_explanation", "diagram_description"],
187
+ "placeholders": {
188
+ "required": ["title_top_1", "image_1", "text_caption_1"],
189
+ "optional": ["date_footer_1", "footer_footer_1", "slide_number_footer_1"]
190
+ },
191
+ "content_hints": {
192
+ "title_top_1": "Image or visual topic",
193
+ "image_1": "Main image or visual content",
194
+ "text_caption_1": "Image description or explanation"
195
+ },
196
+ "confidence_factors": {
197
+ "image_reference": 0.95,
198
+ "visual_content": 0.9,
199
+ "media_mention": 0.8
200
+ }
201
+ },
202
+ "Agenda, 6 Textboxes": {
203
+ "optimal_for": ["meeting_agenda", "process_steps", "numbered_items", "timeline"],
204
+ "placeholders": {
205
+ "required": ["title_top_1", "number_item1_1", "content_item1_1", "number_item2_1", "content_item2_1"],
206
+ "optional": ["number_item3_1", "content_item3_1", "number_item4_1", "content_item4_1", "number_item5_1", "content_item5_1", "number_item6_1", "content_item6_1", "date_footer_1", "footer_footer_1", "slide_number_footer_1"]
207
+ },
208
+ "content_hints": {
209
+ "title_top_1": "Agenda or process title",
210
+ "number_item1_1": "Item number (01, 1, etc.)",
211
+ "content_item1_1": "Item description"
212
+ },
213
+ "confidence_factors": {
214
+ "numbered_list": 0.9,
215
+ "agenda_keywords": 0.85,
216
+ "sequential_content": 0.8
217
+ }
218
+ },
219
+ "Big Number": {
220
+ "optimal_for": ["key_metric", "important_statistic", "single_number_focus"],
221
+ "placeholders": {
222
+ "required": ["title_top_1", "content_1"],
223
+ "optional": ["date_footer_1", "footer_footer_1", "slide_number_footer_1"]
224
+ },
225
+ "content_hints": {
226
+ "title_top_1": "Metric description",
227
+ "content_1": "Large number or percentage with context"
228
+ },
229
+ "confidence_factors": {
230
+ "percentage": 0.95,
231
+ "large_number": 0.9,
232
+ "metric_keywords": 0.85
233
+ }
234
+ },
235
+ "SWOT Analysis": {
236
+ "optimal_for": ["swot_analysis", "four_quadrant_analysis", "strategic_planning"],
237
+ "placeholders": {
238
+ "required": ["content_16", "content_17", "content_18", "content_19"],
239
+ "optional": ["date_footer_1", "footer_footer_1", "slide_number_footer_1"]
240
+ },
241
+ "content_hints": {
242
+ "content_16": "Strengths",
243
+ "content_17": "Weaknesses",
244
+ "content_18": "Opportunities",
245
+ "content_19": "Threats"
246
+ },
247
+ "confidence_factors": {
248
+ "swot_keywords": 0.98,
249
+ "four_categories": 0.9,
250
+ "analysis_structure": 0.85
251
+ }
252
+ }
253
+ },
254
+ "recommendation_engine": {
255
+ "scoring_weights": {
256
+ "content_structure": 0.4,
257
+ "keyword_matching": 0.3,
258
+ "intent_recognition": 0.2,
259
+ "layout_compatibility": 0.1
260
+ },
261
+ "minimum_confidence": 0.6,
262
+ "fallback_layouts": ["Title and Content", "Title Slide"],
263
+ "recommendation_rules": {
264
+ "multiple_recommendations": true,
265
+ "max_recommendations": 3,
266
+ "include_alternatives": true,
267
+ "explain_reasoning": true
268
+ }
269
+ },
270
+ "optimization_hints": {
271
+ "content_length": {
272
+ "title_top_1": "Keep under 60 characters for readability",
273
+ "subtitle_1": "Aim for 80-120 characters",
274
+ "content_areas": "Use bullet points for lists over 3 items",
275
+ "column_content": "Balance content length across columns"
276
+ },
277
+ "formatting_suggestions": {
278
+ "emphasis": "Use **bold** for key terms, *italic* for emphasis",
279
+ "structure": "Use consistent hierarchy with #, ##, ###",
280
+ "lists": "Use - for bullets, 1. for numbered items"
281
+ },
282
+ "layout_specific": {
283
+ "Comparison": "Keep left/right content balanced in length",
284
+ "Four Columns With Titles": "Aim for parallel structure across columns",
285
+ "Agenda, 6 Textboxes": "Use consistent numbering format (01, 02, etc.)"
286
+ }
287
+ }
288
+ }