haoline 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. haoline/.streamlit/config.toml +10 -0
  2. haoline/__init__.py +248 -0
  3. haoline/analyzer.py +935 -0
  4. haoline/cli.py +2712 -0
  5. haoline/compare.py +811 -0
  6. haoline/compare_visualizations.py +1564 -0
  7. haoline/edge_analysis.py +525 -0
  8. haoline/eval/__init__.py +131 -0
  9. haoline/eval/adapters.py +844 -0
  10. haoline/eval/cli.py +390 -0
  11. haoline/eval/comparison.py +542 -0
  12. haoline/eval/deployment.py +633 -0
  13. haoline/eval/schemas.py +833 -0
  14. haoline/examples/__init__.py +15 -0
  15. haoline/examples/basic_inspection.py +74 -0
  16. haoline/examples/compare_models.py +117 -0
  17. haoline/examples/hardware_estimation.py +78 -0
  18. haoline/format_adapters.py +1001 -0
  19. haoline/formats/__init__.py +123 -0
  20. haoline/formats/coreml.py +250 -0
  21. haoline/formats/gguf.py +483 -0
  22. haoline/formats/openvino.py +255 -0
  23. haoline/formats/safetensors.py +273 -0
  24. haoline/formats/tflite.py +369 -0
  25. haoline/hardware.py +2307 -0
  26. haoline/hierarchical_graph.py +462 -0
  27. haoline/html_export.py +1573 -0
  28. haoline/layer_summary.py +769 -0
  29. haoline/llm_summarizer.py +465 -0
  30. haoline/op_icons.py +618 -0
  31. haoline/operational_profiling.py +1492 -0
  32. haoline/patterns.py +1116 -0
  33. haoline/pdf_generator.py +265 -0
  34. haoline/privacy.py +250 -0
  35. haoline/pydantic_models.py +241 -0
  36. haoline/report.py +1923 -0
  37. haoline/report_sections.py +539 -0
  38. haoline/risks.py +521 -0
  39. haoline/schema.py +523 -0
  40. haoline/streamlit_app.py +2024 -0
  41. haoline/tests/__init__.py +4 -0
  42. haoline/tests/conftest.py +123 -0
  43. haoline/tests/test_analyzer.py +868 -0
  44. haoline/tests/test_compare_visualizations.py +293 -0
  45. haoline/tests/test_edge_analysis.py +243 -0
  46. haoline/tests/test_eval.py +604 -0
  47. haoline/tests/test_format_adapters.py +460 -0
  48. haoline/tests/test_hardware.py +237 -0
  49. haoline/tests/test_hardware_recommender.py +90 -0
  50. haoline/tests/test_hierarchical_graph.py +326 -0
  51. haoline/tests/test_html_export.py +180 -0
  52. haoline/tests/test_layer_summary.py +428 -0
  53. haoline/tests/test_llm_patterns.py +540 -0
  54. haoline/tests/test_llm_summarizer.py +339 -0
  55. haoline/tests/test_patterns.py +774 -0
  56. haoline/tests/test_pytorch.py +327 -0
  57. haoline/tests/test_report.py +383 -0
  58. haoline/tests/test_risks.py +398 -0
  59. haoline/tests/test_schema.py +417 -0
  60. haoline/tests/test_tensorflow.py +380 -0
  61. haoline/tests/test_visualizations.py +316 -0
  62. haoline/universal_ir.py +856 -0
  63. haoline/visualizations.py +1086 -0
  64. haoline/visualize_yolo.py +44 -0
  65. haoline/web.py +110 -0
  66. haoline-0.3.0.dist-info/METADATA +471 -0
  67. haoline-0.3.0.dist-info/RECORD +70 -0
  68. haoline-0.3.0.dist-info/WHEEL +4 -0
  69. haoline-0.3.0.dist-info/entry_points.txt +5 -0
  70. haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,265 @@
1
+ # Copyright (c) 2025 HaoLine Contributors
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ PDF generation for HaoLine using Playwright.
6
+
7
+ This module provides PDF generation from HTML reports using Playwright,
8
+ which renders the HTML with a real browser engine for high-quality output.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+ import logging
15
+ import pathlib
16
+ from typing import TYPE_CHECKING
17
+
18
+ if TYPE_CHECKING:
19
+ from .report import InspectionReport
20
+
21
+ # Check for Playwright availability
22
+ _HAS_PLAYWRIGHT = False
23
+ try:
24
+ from playwright.async_api import async_playwright
25
+
26
+ _HAS_PLAYWRIGHT = True
27
+ except ImportError:
28
+ pass
29
+
30
+
31
+ def is_available() -> bool:
32
+ """Check if Playwright is available for PDF generation."""
33
+ return _HAS_PLAYWRIGHT
34
+
35
+
36
+ class PDFGenerator:
37
+ """
38
+ Generate PDF reports from HTML using Playwright.
39
+
40
+ Playwright provides high-quality PDF rendering using Chromium,
41
+ ensuring consistent output across platforms.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ logger: logging.Logger | None = None,
47
+ page_format: str = "A4",
48
+ landscape: bool = False,
49
+ print_background: bool = True,
50
+ margin_top: str = "20mm",
51
+ margin_bottom: str = "20mm",
52
+ margin_left: str = "15mm",
53
+ margin_right: str = "15mm",
54
+ ):
55
+ """
56
+ Initialize PDF generator.
57
+
58
+ Args:
59
+ logger: Logger instance
60
+ page_format: Page format (A4, Letter, Legal, etc.)
61
+ landscape: Use landscape orientation
62
+ print_background: Include background colors/images
63
+ margin_top: Top margin (CSS units)
64
+ margin_bottom: Bottom margin (CSS units)
65
+ margin_left: Left margin (CSS units)
66
+ margin_right: Right margin (CSS units)
67
+ """
68
+ self.logger = logger or logging.getLogger("haoline.pdf")
69
+ self.page_format = page_format
70
+ self.landscape = landscape
71
+ self.print_background = print_background
72
+ self.margin = {
73
+ "top": margin_top,
74
+ "bottom": margin_bottom,
75
+ "left": margin_left,
76
+ "right": margin_right,
77
+ }
78
+
79
+ async def _generate_pdf_async(
80
+ self,
81
+ html_content: str,
82
+ output_path: pathlib.Path,
83
+ ) -> bool:
84
+ """
85
+ Async implementation of PDF generation.
86
+
87
+ Args:
88
+ html_content: HTML string to convert
89
+ output_path: Path for output PDF
90
+
91
+ Returns:
92
+ True if successful, False otherwise
93
+ """
94
+ if not _HAS_PLAYWRIGHT:
95
+ self.logger.error(
96
+ "Playwright not installed. Install with: pip install playwright && playwright install chromium"
97
+ )
98
+ return False
99
+
100
+ try:
101
+ async with async_playwright() as p:
102
+ # Launch headless Chromium
103
+ browser = await p.chromium.launch(headless=True)
104
+ page = await browser.new_page()
105
+
106
+ # Set the HTML content
107
+ await page.set_content(html_content, wait_until="networkidle")
108
+
109
+ # Add custom CSS for better PDF rendering with smart page breaks
110
+ await page.add_style_tag(
111
+ content="""@media print {
112
+ body { -webkit-print-color-adjust: exact !important; print-color-adjust: exact !important; }
113
+ .no-print, button, .toggle-btn, .search-box { display: none !important; }
114
+ pre, code { white-space: pre-wrap !important; word-wrap: break-word !important; max-width: 100% !important; overflow-wrap: break-word !important; }
115
+ p, li { orphans: 3; widows: 3; }
116
+ h1, h2, h3, h4, h5, h6 { page-break-after: avoid !important; break-after: avoid !important; }
117
+ section { page-break-inside: avoid; break-inside: avoid; }
118
+ .kv-cache, .memory-breakdown, .visualizations, .graph-section, .layer-summary, .architecture, .hardware, .risks, .batch-scaling, .resolution-scaling { page-break-before: always !important; break-before: page !important; }
119
+ .executive-summary, .metrics-cards, .param-details, .dataset-info, .system-requirements { page-break-inside: avoid !important; break-inside: avoid !important; }
120
+ table { page-break-inside: avoid !important; break-inside: avoid !important; }
121
+ tr { page-break-inside: avoid !important; break-inside: avoid !important; }
122
+ figure, .chart-container, .visualization-item { page-break-inside: avoid !important; break-inside: avoid !important; }
123
+ img { page-break-inside: avoid !important; break-inside: avoid !important; max-width: 100% !important; height: auto !important; }
124
+ .metric-card, .card { page-break-inside: avoid !important; break-inside: avoid !important; }
125
+ .risk-item, .risk-signal { page-break-inside: avoid !important; break-inside: avoid !important; }
126
+ .comparison-table, .variant-table { page-break-inside: avoid !important; }
127
+ .engine-panel, .summary-panel { page-break-inside: avoid !important; break-inside: avoid !important; }
128
+ .recommendation, .calibration-rec { page-break-inside: avoid !important; break-inside: avoid !important; }
129
+ }"""
130
+ )
131
+
132
+ # Wait for any images to load
133
+ await page.wait_for_load_state("networkidle")
134
+
135
+ # Generate PDF
136
+ await page.pdf(
137
+ path=str(output_path),
138
+ format=self.page_format,
139
+ landscape=self.landscape,
140
+ print_background=self.print_background,
141
+ margin=self.margin,
142
+ display_header_footer=True,
143
+ header_template='<div style="font-size: 9px; color: #666; width: 100%; text-align: center; padding: 5px 0;">HaoLine Report</div>',
144
+ footer_template='<div style="font-size: 9px; color: #666; width: 100%; text-align: center; padding: 5px 0;"><span class="pageNumber"></span> / <span class="totalPages"></span></div>',
145
+ )
146
+
147
+ await browser.close()
148
+ return True
149
+
150
+ except Exception as e:
151
+ self.logger.error(f"PDF generation failed: {e}")
152
+ return False
153
+
154
+ def generate_from_html(
155
+ self,
156
+ html_content: str,
157
+ output_path: pathlib.Path,
158
+ ) -> bool:
159
+ """
160
+ Generate PDF from HTML content.
161
+
162
+ Args:
163
+ html_content: HTML string to convert
164
+ output_path: Path for output PDF
165
+
166
+ Returns:
167
+ True if successful, False otherwise
168
+ """
169
+ output_path = pathlib.Path(output_path)
170
+ output_path.parent.mkdir(parents=True, exist_ok=True)
171
+
172
+ self.logger.info(f"Generating PDF: {output_path}")
173
+
174
+ # Run async function
175
+ try:
176
+ loop = asyncio.get_event_loop()
177
+ except RuntimeError:
178
+ loop = asyncio.new_event_loop()
179
+ asyncio.set_event_loop(loop)
180
+
181
+ return loop.run_until_complete(self._generate_pdf_async(html_content, output_path))
182
+
183
+ def generate_from_html_file(
184
+ self,
185
+ html_path: pathlib.Path,
186
+ output_path: pathlib.Path,
187
+ ) -> bool:
188
+ """
189
+ Generate PDF from an HTML file.
190
+
191
+ Args:
192
+ html_path: Path to HTML file
193
+ output_path: Path for output PDF
194
+
195
+ Returns:
196
+ True if successful, False otherwise
197
+ """
198
+ html_path = pathlib.Path(html_path)
199
+ if not html_path.exists():
200
+ self.logger.error(f"HTML file not found: {html_path}")
201
+ return False
202
+
203
+ html_content = html_path.read_text(encoding="utf-8")
204
+ return self.generate_from_html(html_content, output_path)
205
+
206
+ def generate_from_report(
207
+ self,
208
+ report: InspectionReport,
209
+ output_path: pathlib.Path,
210
+ image_paths: dict[str, pathlib.Path] | None = None,
211
+ ) -> bool:
212
+ """
213
+ Generate PDF directly from an InspectionReport.
214
+
215
+ Args:
216
+ report: InspectionReport instance
217
+ output_path: Path for output PDF
218
+ image_paths: Optional dict of image paths for visualizations
219
+
220
+ Returns:
221
+ True if successful, False otherwise
222
+ """
223
+ # Generate HTML with embedded images (for PDF, all images are base64)
224
+ html_content = report.to_html(image_paths=image_paths)
225
+ return self.generate_from_html(html_content, output_path)
226
+
227
+
228
+ async def generate_pdf_async(
229
+ html_content: str,
230
+ output_path: pathlib.Path,
231
+ **kwargs,
232
+ ) -> bool:
233
+ """
234
+ Convenience async function for PDF generation.
235
+
236
+ Args:
237
+ html_content: HTML string to convert
238
+ output_path: Path for output PDF
239
+ **kwargs: Additional options for PDFGenerator
240
+
241
+ Returns:
242
+ True if successful, False otherwise
243
+ """
244
+ generator = PDFGenerator(**kwargs)
245
+ return await generator._generate_pdf_async(html_content, output_path)
246
+
247
+
248
+ def generate_pdf(
249
+ html_content: str,
250
+ output_path: pathlib.Path,
251
+ **kwargs,
252
+ ) -> bool:
253
+ """
254
+ Convenience function for PDF generation.
255
+
256
+ Args:
257
+ html_content: HTML string to convert
258
+ output_path: Path for output PDF
259
+ **kwargs: Additional options for PDFGenerator
260
+
261
+ Returns:
262
+ True if successful, False otherwise
263
+ """
264
+ generator = PDFGenerator(**kwargs)
265
+ return generator.generate_from_html(html_content, output_path)
haoline/privacy.py ADDED
@@ -0,0 +1,250 @@
1
+ """
2
+ HaoLine Privacy Utilities.
3
+
4
+ Functions for redacting sensitive information from model analysis reports.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+
12
+ def create_name_mapping(names: set[str]) -> dict[str, str]:
13
+ """
14
+ Create a deterministic mapping from original names to anonymized names.
15
+
16
+ Args:
17
+ names: Set of original names to anonymize.
18
+
19
+ Returns:
20
+ Dictionary mapping original names to anonymized names.
21
+ """
22
+ # Sort for deterministic ordering
23
+ sorted_names = sorted(names)
24
+
25
+ mapping: dict[str, str] = {}
26
+ counters: dict[str, int] = {}
27
+
28
+ for name in sorted_names:
29
+ # Determine the prefix based on naming patterns
30
+ prefix = _infer_prefix(name)
31
+ count = counters.get(prefix, 0) + 1
32
+ counters[prefix] = count
33
+ mapping[name] = f"{prefix}_{count:04d}"
34
+
35
+ return mapping
36
+
37
+
38
+ def _infer_prefix(name: str) -> str:
39
+ """Infer an anonymized prefix based on the original name pattern."""
40
+ name_lower = name.lower()
41
+
42
+ # Common ONNX/model patterns
43
+ if any(x in name_lower for x in ["conv", "cnn"]):
44
+ return "conv"
45
+ if any(x in name_lower for x in ["bn", "batchnorm", "batch_norm"]):
46
+ return "bn"
47
+ if any(x in name_lower for x in ["relu", "gelu", "silu", "activation"]):
48
+ return "act"
49
+ if any(x in name_lower for x in ["fc", "linear", "dense", "gemm", "matmul"]):
50
+ return "linear"
51
+ if any(x in name_lower for x in ["attention", "attn", "self_attn"]):
52
+ return "attn"
53
+ if any(x in name_lower for x in ["embed", "embedding"]):
54
+ return "embed"
55
+ if any(x in name_lower for x in ["norm", "layernorm", "layer_norm"]):
56
+ return "norm"
57
+ if any(x in name_lower for x in ["pool", "avgpool", "maxpool"]):
58
+ return "pool"
59
+ if any(x in name_lower for x in ["reshape", "view", "flatten"]):
60
+ return "reshape"
61
+ if any(x in name_lower for x in ["concat", "cat"]):
62
+ return "concat"
63
+ if any(x in name_lower for x in ["add", "sum"]):
64
+ return "add"
65
+ if any(x in name_lower for x in ["mul", "multiply"]):
66
+ return "mul"
67
+ if any(x in name_lower for x in ["split", "chunk"]):
68
+ return "split"
69
+ if any(x in name_lower for x in ["transpose", "permute"]):
70
+ return "transpose"
71
+ if any(x in name_lower for x in ["weight", "bias", "param"]):
72
+ return "param"
73
+ if any(x in name_lower for x in ["input", "inp"]):
74
+ return "input"
75
+ if any(x in name_lower for x in ["output", "out"]):
76
+ return "output"
77
+
78
+ # Default
79
+ return "node"
80
+
81
+
82
+ def collect_names_from_dict(data: dict[str, Any]) -> set[str]:
83
+ """
84
+ Recursively collect all string values that look like layer/tensor names.
85
+
86
+ Args:
87
+ data: Dictionary to scan (typically from report.to_dict()).
88
+
89
+ Returns:
90
+ Set of potential names to anonymize.
91
+ """
92
+ names: set[str] = set()
93
+ _collect_names_recursive(data, names)
94
+ return names
95
+
96
+
97
+ def _collect_names_recursive(obj: Any, names: set[str], key: str = "") -> None:
98
+ """Recursively collect names from nested structures."""
99
+ if isinstance(obj, dict):
100
+ for k, v in obj.items():
101
+ # Keys that typically contain names
102
+ if k in (
103
+ "name",
104
+ "node_name",
105
+ "layer_name",
106
+ "tensor_name",
107
+ "op_name",
108
+ "input_name",
109
+ "output_name",
110
+ ):
111
+ if isinstance(v, str):
112
+ names.add(v)
113
+ # Keys that map names to values
114
+ elif k in (
115
+ "by_node",
116
+ "by_name",
117
+ "input_shapes",
118
+ "output_shapes",
119
+ "shared_weights",
120
+ ):
121
+ if isinstance(v, dict):
122
+ names.update(v.keys())
123
+ # Lists like largest_weights, largest_activations
124
+ elif k in ("largest_weights", "largest_activations"):
125
+ if isinstance(v, list):
126
+ for item in v:
127
+ if isinstance(item, dict) and "name" in item:
128
+ names.add(item["name"])
129
+ elif isinstance(item, (list, tuple)) and len(item) >= 1:
130
+ if isinstance(item[0], str):
131
+ names.add(item[0])
132
+
133
+ _collect_names_recursive(v, names, k)
134
+
135
+ elif isinstance(obj, list):
136
+ for item in obj:
137
+ _collect_names_recursive(item, names, key)
138
+
139
+
140
+ def redact_dict(
141
+ data: dict[str, Any],
142
+ mapping: dict[str, str],
143
+ ) -> dict[str, Any]:
144
+ """
145
+ Apply name redaction to a dictionary (typically from report.to_dict()).
146
+
147
+ Args:
148
+ data: Dictionary to redact.
149
+ mapping: Mapping from original names to anonymized names.
150
+
151
+ Returns:
152
+ New dictionary with names replaced.
153
+ """
154
+ result = _redact_recursive(data, mapping)
155
+ # _redact_recursive always returns a dict when given a dict
156
+ assert isinstance(result, dict)
157
+ return result
158
+
159
+
160
+ def _redact_recursive(obj: Any, mapping: dict[str, str]) -> Any:
161
+ """Recursively apply redaction to nested structures."""
162
+ if isinstance(obj, dict):
163
+ result = {}
164
+ for k, v in obj.items():
165
+ # Replace keys if they're in the mapping (for by_node, etc.)
166
+ new_key = mapping.get(k, k) if isinstance(k, str) else k
167
+ result[new_key] = _redact_recursive(v, mapping)
168
+ return result
169
+
170
+ elif isinstance(obj, list):
171
+ return [_redact_recursive(item, mapping) for item in obj]
172
+
173
+ elif isinstance(obj, str):
174
+ # Replace string values if they match a name
175
+ return mapping.get(obj, obj)
176
+
177
+ else:
178
+ return obj
179
+
180
+
181
+ def create_summary_only_dict(data: dict[str, Any]) -> dict[str, Any]:
182
+ """
183
+ Strip a report dictionary to summary-only (no per-layer details).
184
+
185
+ Args:
186
+ data: Full report dictionary.
187
+
188
+ Returns:
189
+ Stripped dictionary with only aggregate stats.
190
+ """
191
+ # Fields to keep (aggregate only)
192
+ keep_fields = {
193
+ "metadata",
194
+ "generated_at",
195
+ "autodoc_version",
196
+ "architecture_type",
197
+ }
198
+
199
+ # Nested fields to summarize
200
+ summary_fields = {
201
+ "graph_summary": ["num_nodes", "num_inputs", "num_outputs", "op_type_counts"],
202
+ "param_counts": ["total", "trainable", "non_trainable", "is_quantized"],
203
+ "flop_counts": ["total"],
204
+ "memory_estimates": ["weights_bytes", "activations_bytes", "total_bytes"],
205
+ }
206
+
207
+ result: dict[str, Any] = {}
208
+
209
+ # Copy allowed fields
210
+ for field in keep_fields:
211
+ if field in data:
212
+ result[field] = data[field]
213
+
214
+ # Extract summary from nested fields
215
+ for field, allowed_keys in summary_fields.items():
216
+ if field in data and data[field]:
217
+ result[field] = {k: data[field][k] for k in allowed_keys if k in data[field]}
218
+
219
+ # Add aggregate risk info without details
220
+ if "risk_signals" in data and data["risk_signals"]:
221
+ result["risk_summary"] = {
222
+ "total_risks": len(data["risk_signals"]),
223
+ "high": sum(1 for r in data["risk_signals"] if r.get("severity") == "high"),
224
+ "medium": sum(1 for r in data["risk_signals"] if r.get("severity") == "medium"),
225
+ "low": sum(1 for r in data["risk_signals"] if r.get("severity") == "low"),
226
+ }
227
+
228
+ # Add detected block counts without names
229
+ if "detected_blocks" in data and data["detected_blocks"]:
230
+ block_counts: dict[str, int] = {}
231
+ for block in data["detected_blocks"]:
232
+ block_type = block.get("block_type", "unknown")
233
+ block_counts[block_type] = block_counts.get(block_type, 0) + 1
234
+ result["detected_block_counts"] = block_counts
235
+
236
+ # Add hardware summary without per-op breakdown
237
+ if "hardware_estimates" in data and data["hardware_estimates"]:
238
+ hw = data["hardware_estimates"]
239
+ result["hardware_estimates"] = {
240
+ k: hw[k]
241
+ for k in [
242
+ "latency_ms",
243
+ "throughput_samples_per_sec",
244
+ "estimated_power_w",
245
+ "bottleneck_summary",
246
+ ]
247
+ if k in hw
248
+ }
249
+
250
+ return result