html2pic 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
html2pic/models.py ADDED
@@ -0,0 +1,168 @@
1
+ """
2
+ Data models for DOM nodes and CSS styles
3
+ """
4
+
5
+ from typing import List, Dict, Any, Optional, Union
6
+ from dataclasses import dataclass, field
7
+ from enum import Enum
8
+
9
+ class NodeType(Enum):
10
+ """Types of DOM nodes"""
11
+ ELEMENT = "element"
12
+ TEXT = "text"
13
+
14
+ @dataclass
15
+ class DOMNode:
16
+ """
17
+ Represents a node in the parsed DOM tree.
18
+
19
+ This is our internal representation of HTML elements and text nodes.
20
+ """
21
+ node_type: NodeType
22
+ tag: Optional[str] = None # HTML tag name (div, span, p, etc.)
23
+ attributes: Dict[str, str] = field(default_factory=dict) # id, class, src, etc.
24
+ text_content: str = "" # Text content for text nodes
25
+ children: List['DOMNode'] = field(default_factory=list)
26
+ parent: Optional['DOMNode'] = None
27
+
28
+ # Computed styles (set by StyleEngine)
29
+ computed_styles: Dict[str, Any] = field(default_factory=dict)
30
+
31
+ def get_classes(self) -> List[str]:
32
+ """Get list of CSS classes for this element"""
33
+ class_attr = self.attributes.get('class', '')
34
+
35
+ # Handle BeautifulSoup's AttributeValueList
36
+ if hasattr(class_attr, '__iter__') and not isinstance(class_attr, str):
37
+ # It's a list-like object, convert to space-separated string
38
+ class_attr = ' '.join(str(cls) for cls in class_attr)
39
+
40
+ return [cls.strip() for cls in str(class_attr).split() if cls.strip()]
41
+
42
+ def get_id(self) -> Optional[str]:
43
+ """Get the ID attribute for this element"""
44
+ return self.attributes.get('id')
45
+
46
+ def is_element(self) -> bool:
47
+ """Check if this is an element node"""
48
+ return self.node_type == NodeType.ELEMENT
49
+
50
+ def is_text(self) -> bool:
51
+ """Check if this is a text node"""
52
+ return self.node_type == NodeType.TEXT
53
+
54
+ def has_text_content(self) -> bool:
55
+ """Check if this node contains any text content"""
56
+ if self.is_text() and self.text_content.strip():
57
+ return True
58
+ return any(child.has_text_content() for child in self.children)
59
+
60
+ def get_all_text(self) -> str:
61
+ """Get all text content from this node and its children"""
62
+ if self.is_text():
63
+ return self.text_content
64
+
65
+ text_parts = []
66
+ for child in self.children:
67
+ child_text = child.get_all_text()
68
+ if child_text:
69
+ text_parts.append(child_text)
70
+
71
+ return ' '.join(text_parts)
72
+
73
+ @dataclass
74
+ class CSSRule:
75
+ """
76
+ Represents a single CSS rule (selector + declarations).
77
+
78
+ Example: .my-class { color: red; font-size: 16px; }
79
+ """
80
+ selector: str # ".my-class"
81
+ declarations: Dict[str, str] # {"color": "red", "font-size": "16px"}
82
+ specificity: int = 0 # Calculated CSS specificity for cascade resolution
83
+
84
+ class SelectorType(Enum):
85
+ """Types of CSS selectors we support"""
86
+ TAG = "tag" # div, p, h1
87
+ CLASS = "class" # .my-class
88
+ ID = "id" # #my-id
89
+ UNIVERSAL = "*" # * (universal selector)
90
+
91
+ @dataclass
92
+ class ParsedSelector:
93
+ """
94
+ Parsed CSS selector information.
95
+
96
+ For now we support simple selectors only.
97
+ Complex selectors like "div > .class" will be added later.
98
+ """
99
+ selector_type: SelectorType
100
+ value: str # The actual selector value (without . or # prefix)
101
+
102
+ @classmethod
103
+ def parse(cls, selector: str) -> 'ParsedSelector':
104
+ """Parse a simple CSS selector string"""
105
+ selector = selector.strip()
106
+
107
+ if selector.startswith('#'):
108
+ return cls(SelectorType.ID, selector[1:])
109
+ elif selector.startswith('.'):
110
+ return cls(SelectorType.CLASS, selector[1:])
111
+ elif selector == '*':
112
+ return cls(SelectorType.UNIVERSAL, '*')
113
+ else:
114
+ # Assume it's a tag selector
115
+ return cls(SelectorType.TAG, selector.lower())
116
+
117
+ # CSS property value types for type safety and validation
118
+ CSSValue = Union[str, int, float]
119
+
120
+ # Default CSS values - these will be applied when no explicit value is set
121
+ DEFAULT_STYLES = {
122
+ # Layout
123
+ 'display': 'block', # We'll map this to Row/Column/Text
124
+ 'flex-direction': 'row',
125
+ 'justify-content': 'flex-start', # Maps to horizontal_distribution/vertical_distribution
126
+ 'align-items': 'stretch', # Maps to vertical_align/horizontal_align
127
+ 'gap': '0px',
128
+
129
+ # Box model
130
+ 'width': 'auto',
131
+ 'height': 'auto',
132
+ 'padding-top': '0px',
133
+ 'padding-right': '0px',
134
+ 'padding-bottom': '0px',
135
+ 'padding-left': '0px',
136
+ 'margin-top': '0px',
137
+ 'margin-right': '0px',
138
+ 'margin-bottom': '0px',
139
+ 'margin-left': '0px',
140
+ 'border-width': '0px',
141
+ 'border-style': 'solid',
142
+ 'border-color': 'black',
143
+ 'border-radius': '0px',
144
+
145
+ # Visual
146
+ 'background-color': 'transparent',
147
+ 'background-image': 'none',
148
+ 'background-size': 'cover',
149
+ 'box-shadow': 'none',
150
+
151
+ # Typography
152
+ 'color': 'black',
153
+ 'font-family': 'Arial, sans-serif',
154
+ 'font-size': '16px',
155
+ 'font-weight': '400',
156
+ 'font-style': 'normal',
157
+ 'text-align': 'left',
158
+ 'line-height': '1.2',
159
+ 'text-decoration': 'none',
160
+ 'text-wrap': 'wrap',
161
+
162
+ # Positioning (for future use)
163
+ 'position': 'static',
164
+ 'top': 'auto',
165
+ 'right': 'auto',
166
+ 'bottom': 'auto',
167
+ 'left': 'auto',
168
+ }
@@ -0,0 +1,442 @@
1
+ """
2
+ Style computation engine - handles cascading, specificity, and inheritance
3
+ """
4
+
5
+ from typing import List, Dict, Any
6
+ from .models import DOMNode, CSSRule, ParsedSelector, SelectorType, DEFAULT_STYLES
7
+ from pictex import SolidColor
8
+ from .warnings_system import get_warning_collector, WarningCategory
9
+
10
+ class StyleEngine:
11
+ """
12
+ Computes final styles for DOM nodes by applying CSS rules.
13
+
14
+ Handles:
15
+ - Selector matching (which rules apply to which elements)
16
+ - Cascade resolution (specificity, source order)
17
+ - Property inheritance (color, font-family, etc.)
18
+ - Unit conversion (px, em, %, etc.)
19
+ """
20
+
21
+ # Properties that inherit from parent to child
22
+ INHERITED_PROPERTIES = {
23
+ 'color', 'font-family', 'font-size', 'font-weight', 'font-style',
24
+ 'line-height', 'text-align', 'text-decoration', 'text-wrap'
25
+ }
26
+
27
+ def __init__(self, base_font_size: int = 16):
28
+ """
29
+ Initialize the style engine.
30
+
31
+ Args:
32
+ base_font_size: Base font size in pixels for em/rem calculations
33
+ """
34
+ self.base_font_size = base_font_size
35
+ self.warnings = get_warning_collector()
36
+
37
+ def apply_styles(self, dom_tree: DOMNode, css_rules: List[CSSRule]) -> DOMNode:
38
+ """
39
+ Apply CSS rules to a DOM tree, computing final styles for each node.
40
+
41
+ Args:
42
+ dom_tree: Root DOM node
43
+ css_rules: List of parsed CSS rules
44
+
45
+ Returns:
46
+ DOM tree with computed_styles populated
47
+ """
48
+ # Apply styles recursively, starting from root
49
+ self._apply_styles_recursive(dom_tree, css_rules, parent_styles={})
50
+ return dom_tree
51
+
52
+ def _apply_styles_recursive(self, node: DOMNode, css_rules: List[CSSRule], parent_styles: Dict[str, Any]):
53
+ """
54
+ Recursively apply styles to a node and its children.
55
+
56
+ Args:
57
+ node: Current DOM node
58
+ css_rules: CSS rules to apply
59
+ parent_styles: Computed styles from parent node (for inheritance)
60
+ """
61
+ # Start with default styles
62
+ computed_styles = DEFAULT_STYLES.copy()
63
+
64
+ # Apply inherited styles from parent
65
+ for prop in self.INHERITED_PROPERTIES:
66
+ if prop in parent_styles:
67
+ computed_styles[prop] = parent_styles[prop]
68
+
69
+ # Find matching CSS rules for this node
70
+ matching_rules = self._find_matching_rules(node, css_rules)
71
+
72
+ # Sort by specificity (lowest to highest) for proper cascade
73
+ matching_rules.sort(key=lambda rule: rule.specificity)
74
+
75
+ # Apply each matching rule's declarations
76
+ for rule in matching_rules:
77
+ for prop, value in rule.declarations.items():
78
+ computed_styles[prop] = value
79
+
80
+ # Convert units and normalize values
81
+ computed_styles = self._normalize_styles(computed_styles, parent_styles)
82
+
83
+ # Store computed styles on the node
84
+ node.computed_styles = computed_styles
85
+
86
+ # Recursively apply to children
87
+ for child in node.children:
88
+ self._apply_styles_recursive(child, css_rules, computed_styles)
89
+
90
+ def _find_matching_rules(self, node: DOMNode, css_rules: List[CSSRule]) -> List[CSSRule]:
91
+ """
92
+ Find all CSS rules that match the given DOM node.
93
+
94
+ Args:
95
+ node: DOM node to match against
96
+ css_rules: List of CSS rules
97
+
98
+ Returns:
99
+ List of matching CSS rules
100
+ """
101
+ matching_rules = []
102
+
103
+ for rule in css_rules:
104
+ if self._selector_matches_node(rule.selector, node):
105
+ matching_rules.append(rule)
106
+
107
+ return matching_rules
108
+
109
+ def _selector_matches_node(self, selector: str, node: DOMNode) -> bool:
110
+ """
111
+ Check if a CSS selector matches a DOM node.
112
+
113
+ For now, we only support simple selectors:
114
+ - tag: div, p, h1
115
+ - class: .my-class
116
+ - id: #my-id
117
+ - universal: *
118
+
119
+ Args:
120
+ selector: CSS selector string
121
+ node: DOM node to test
122
+
123
+ Returns:
124
+ True if selector matches the node
125
+ """
126
+ if not node.is_element():
127
+ return False
128
+
129
+ try:
130
+ parsed = ParsedSelector.parse(selector)
131
+
132
+ if parsed.selector_type == SelectorType.UNIVERSAL:
133
+ return True
134
+ elif parsed.selector_type == SelectorType.TAG:
135
+ return node.tag == parsed.value
136
+ elif parsed.selector_type == SelectorType.CLASS:
137
+ return parsed.value in node.get_classes()
138
+ elif parsed.selector_type == SelectorType.ID:
139
+ return node.get_id() == parsed.value
140
+ else:
141
+ return False
142
+
143
+ except Exception:
144
+ # If we can't parse the selector, assume it doesn't match
145
+ return False
146
+
147
+ def _normalize_styles(self, styles: Dict[str, Any], parent_styles: Dict[str, Any]) -> Dict[str, Any]:
148
+ """
149
+ Normalize and convert style values.
150
+
151
+ - Convert units (em to px, etc.)
152
+ - Resolve relative values
153
+ - Validate and clean up values
154
+
155
+ Args:
156
+ styles: Raw style dictionary
157
+ parent_styles: Parent's computed styles (for relative units)
158
+
159
+ Returns:
160
+ Normalized style dictionary
161
+ """
162
+ normalized = styles.copy()
163
+
164
+ # Validate CSS values and warn about unexpected ones
165
+ for prop, value in normalized.items():
166
+ if isinstance(value, str):
167
+ self._validate_css_value(prop, value)
168
+
169
+ # Convert font-size first (needed for em calculations)
170
+ if 'font-size' in normalized:
171
+ normalized['font-size'] = self._convert_to_pixels(
172
+ normalized['font-size'],
173
+ parent_styles.get('font-size', f'{self.base_font_size}px'),
174
+ 'font-size'
175
+ )
176
+
177
+ # Convert other length values
178
+ length_properties = [
179
+ 'width', 'height', 'padding-top', 'padding-right', 'padding-bottom', 'padding-left',
180
+ 'margin-top', 'margin-right', 'margin-bottom', 'margin-left',
181
+ 'border-width', 'border-radius', 'gap'
182
+ ]
183
+
184
+ for prop in length_properties:
185
+ if prop in normalized:
186
+ normalized[prop] = self._convert_to_pixels(
187
+ normalized[prop],
188
+ parent_styles.get(prop, '0px'),
189
+ prop,
190
+ font_size=normalized.get('font-size', f'{self.base_font_size}px')
191
+ )
192
+
193
+ # Normalize display property to our layout system
194
+ normalized['display'] = self._normalize_display(normalized.get('display', 'block'))
195
+
196
+ # Clean up color values
197
+ for color_prop in ['color', 'background-color', 'border-color']:
198
+ if color_prop in normalized:
199
+ normalized[color_prop] = self._normalize_color(normalized[color_prop])
200
+
201
+ return normalized
202
+
203
+ def _convert_to_pixels(self, value: str, parent_value: str = '0px', property_name: str = '', font_size: str = '16px') -> str:
204
+ """
205
+ Convert CSS length values to pixels.
206
+
207
+ Supports: px, em, rem, %
208
+ For unsupported units or 'auto', returns original value.
209
+ """
210
+ if not isinstance(value, str):
211
+ return str(value)
212
+
213
+ value = value.strip().lower()
214
+
215
+ # Already in pixels or special values
216
+ if value.endswith('px') or value in ['auto', 'none', 'inherit', 'initial']:
217
+ return value
218
+
219
+ # Convert em (relative to font size)
220
+ if value.endswith('em'):
221
+ try:
222
+ em_value = float(value[:-2])
223
+ base_size = float(font_size.rstrip('px')) if font_size.endswith('px') else self.base_font_size
224
+ return f'{em_value * base_size}px'
225
+ except ValueError:
226
+ return value
227
+
228
+ # Convert rem (relative to root font size)
229
+ if value.endswith('rem'):
230
+ try:
231
+ rem_value = float(value[:-3])
232
+ return f'{rem_value * self.base_font_size}px'
233
+ except ValueError:
234
+ return value
235
+
236
+ # Convert percentage (depends on property and parent value)
237
+ if value.endswith('%'):
238
+ try:
239
+ percent_value = float(value[:-1])
240
+ if parent_value.endswith('px'):
241
+ parent_px = float(parent_value[:-2])
242
+ return f'{(percent_value / 100) * parent_px}px'
243
+ else:
244
+ return value # Can't convert without pixel parent value
245
+ except ValueError:
246
+ return value
247
+
248
+ # If it's just a number, assume pixels
249
+ try:
250
+ float_val = float(value)
251
+ return f'{float_val}px'
252
+ except ValueError:
253
+ return value
254
+
255
+ def _normalize_display(self, display_value: str) -> str:
256
+ """
257
+ Normalize display property to values we understand.
258
+
259
+ Maps various CSS display values to our internal system:
260
+ - flex -> flex
261
+ - block, div -> block
262
+ - inline, span -> inline
263
+ """
264
+ display_value = display_value.strip().lower()
265
+
266
+ if display_value in ['flex']:
267
+ return 'flex'
268
+ elif display_value in ['block', 'div']:
269
+ return 'block'
270
+ elif display_value in ['inline', 'inline-block', 'span']:
271
+ return 'inline'
272
+ else:
273
+ return 'block' # Default fallback
274
+
275
+ def _normalize_color(self, color_value: str) -> str:
276
+ """
277
+ Normalize color values for PicTex compatibility.
278
+
279
+ Ensures color format is compatible with PicTex's SolidColor.from_str()
280
+ """
281
+ if not isinstance(color_value, str):
282
+ return 'black'
283
+
284
+ color_value = color_value.strip().lower()
285
+
286
+ # Handle transparent - PicTex doesn't support rgba, so we'll skip transparent backgrounds
287
+ if color_value == 'transparent':
288
+ return 'transparent' # Keep as transparent, translator will handle this
289
+
290
+ # Handle rgba/rgb - extract RGB values and convert to hex
291
+ if color_value.startswith('rgba(') or color_value.startswith('rgb('):
292
+ parsed_color = self._parse_rgba_color(color_value)
293
+ if parsed_color != color_value and parsed_color == 'black':
294
+ # Parsing failed, warn about it
295
+ self.warnings.warn_color_fallback(
296
+ color_value, 'black', 'Failed to parse RGBA/RGB color'
297
+ )
298
+ return parsed_color
299
+
300
+ return color_value # Return as-is for hex codes and named colors
301
+
302
+ def _validate_css_value(self, property_name: str, value: str) -> bool:
303
+ """
304
+ Validate CSS property values and warn about unexpected values.
305
+
306
+ Returns True if value is valid, False if invalid (but still allows processing)
307
+ """
308
+ value = value.strip().lower()
309
+
310
+ # Valid values for specific properties
311
+ valid_values = {
312
+ 'display': ['none', 'block', 'inline', 'inline-block', 'flex'],
313
+ 'flex-direction': ['row', 'column', 'row-reverse', 'column-reverse'],
314
+ 'justify-content': ['flex-start', 'center', 'flex-end', 'space-between', 'space-around', 'space-evenly'],
315
+ 'align-items': ['flex-start', 'center', 'flex-end', 'stretch', 'baseline'],
316
+ 'text-align': ['left', 'center', 'right', 'justify'],
317
+ 'font-weight': ['normal', 'bold', 'bolder', 'lighter', '100', '200', '300', '400', '500', '600', '700', '800', '900'],
318
+ 'font-style': ['normal', 'italic', 'oblique'],
319
+ 'text-decoration': ['none', 'underline', 'overline', 'line-through'],
320
+ 'border-style': ['none', 'solid', 'dashed', 'dotted', 'double'],
321
+ 'position': ['static', 'relative', 'absolute', 'fixed', 'sticky'],
322
+ 'text-wrap': ['wrap', 'nowrap', 'balance'],
323
+ }
324
+
325
+ # Check if property has restricted values
326
+ if property_name in valid_values:
327
+ if value not in valid_values[property_name]:
328
+ self.warnings.warn(
329
+ f"Unexpected value '{value}' for CSS property '{property_name}'. Expected one of: {', '.join(valid_values[property_name])}",
330
+ WarningCategory.CSS_PARSING,
331
+ {'property': property_name, 'value': value, 'expected': valid_values[property_name]}
332
+ )
333
+ return False
334
+
335
+ # Validate length values (px, em, rem, %)
336
+ length_properties = [
337
+ 'width', 'height', 'padding', 'padding-top', 'padding-right', 'padding-bottom', 'padding-left',
338
+ 'margin', 'margin-top', 'margin-right', 'margin-bottom', 'margin-left',
339
+ 'border-width', 'border-radius', 'font-size', 'gap', 'left', 'top'
340
+ ]
341
+
342
+ if property_name in length_properties and value not in ['auto', 'none', 'inherit', 'initial']:
343
+ # Check if it's a valid length value
344
+ import re
345
+ if not re.match(r'^-?(\d+\.?\d*|\.\d+)(px|em|rem|%|in|cm|mm|pt|pc)$|^\d+$', value):
346
+ self.warnings.warn(
347
+ f"Invalid length value '{value}' for CSS property '{property_name}'. Expected format: number + unit (px, em, rem, %) or keywords (auto, none)",
348
+ WarningCategory.CSS_PARSING,
349
+ {'property': property_name, 'value': value}
350
+ )
351
+ return False
352
+
353
+ # Validate color values
354
+ color_properties = ['color', 'background-color', 'border-color']
355
+ if property_name in color_properties:
356
+ if not self._is_valid_color(value):
357
+ self.warnings.warn(
358
+ f"Invalid color value '{value}' for CSS property '{property_name}'",
359
+ WarningCategory.CSS_PARSING,
360
+ {'property': property_name, 'value': value}
361
+ )
362
+ return False
363
+
364
+ return True
365
+
366
+ def _is_valid_color(self, color: str) -> bool:
367
+ """Check if a color value is valid"""
368
+ color = color.strip().lower()
369
+
370
+ # Named colors
371
+ named_colors = [
372
+ 'black', 'white', 'red', 'green', 'blue', 'yellow', 'purple', 'orange',
373
+ 'gray', 'grey', 'pink', 'brown', 'cyan', 'magenta', 'transparent'
374
+ ]
375
+
376
+ if color in named_colors:
377
+ return True
378
+
379
+ # Hex colors
380
+ import re
381
+ if re.match(r'^#([0-9a-f]{3}|[0-9a-f]{6})$', color):
382
+ return True
383
+
384
+ # RGB/RGBA colors
385
+ if color.startswith('rgb(') or color.startswith('rgba('):
386
+ return True
387
+
388
+ # HSL colors (not fully supported but valid CSS)
389
+ if color.startswith('hsl(') or color.startswith('hsla('):
390
+ return True
391
+
392
+ return False
393
+
394
+ def _parse_rgba_color(self, rgba_string: str) -> str:
395
+ """
396
+ Parse rgba() and rgb() color values and convert to hex format.
397
+
398
+ Args:
399
+ rgba_string: Color string like 'rgba(255, 0, 0, 0.5)' or 'rgb(255, 0, 0)'
400
+
401
+ Returns:
402
+ Hex color string like '#ff0000' or 'transparent' for fully transparent colors
403
+ """
404
+ import re
405
+
406
+ # Remove function name and parentheses, keep only the values
407
+ values_str = rgba_string.replace('rgba(', '').replace('rgb(', '').replace(')', '').strip()
408
+
409
+ # Split by comma and clean up values
410
+ values = [val.strip() for val in values_str.split(',')]
411
+
412
+ try:
413
+ # Parse RGB values
414
+ r = int(float(values[0]))
415
+ g = int(float(values[1]))
416
+ b = int(float(values[2]))
417
+
418
+ # Parse alpha if present (rgba)
419
+ alpha = 1.0 # Default for rgb()
420
+ if len(values) >= 4:
421
+ alpha = float(values[3])
422
+
423
+ # Clamp RGB values to 0-255
424
+ r = max(0, min(255, r))
425
+ g = max(0, min(255, g))
426
+ b = max(0, min(255, b))
427
+
428
+ # If fully transparent, return transparent
429
+ if alpha <= 0.01: # Allow for floating point precision issues
430
+ return 'transparent'
431
+
432
+ # Convert to hex format
433
+ hex_color = f'#{r:02x}{g:02x}{b:02x}'
434
+
435
+ # If alpha is less than 1, we could add alpha to hex (RGBA hex)
436
+ # But PicTex might not support it, so for now we ignore partial transparency
437
+ # and just return the RGB part
438
+ return hex_color
439
+
440
+ except (ValueError, IndexError) as e:
441
+ # If parsing fails, return a sensible default
442
+ return 'black'